---
title: 'Figures'
author: David Hume
date: '`r format(Sys.time(), "%d %B %Y")`'
output: 
   html_document:
    toc: true
    toc_depth: 3 
editor_options: 
  chunk_output_type: console
---
---

```{r setup, message=FALSE, echo = FALSE}
library(tidyverse)
library(data.table)
library(Hmisc)
library(fasttime)
library(lubridate)
library(stringr)
library(cowplot)
library(car)
library(stargazer)
library(lfe)
library(knitr)
library(RColorBrewer)
library(epiDisplay)
library(scales)
options(width=200, scipen=10)
knitr::opts_chunk$set(echo = TRUE, cache = FALSE, warning = FALSE, 
                      message = FALSE, cache.lazy = FALSE)

```

<div style="line-height: 2em;">
<font size="4"> 

---

```{r cache=FALSE, echo = FALSE}
Time <- Sys.time()
# Set-up

# Switches & setting of variables

# Data set either "All8pc", "All1pc", "Test", "Render"
analysis.1 <- "Render"

analysis <- ifelse(analysis.1 =="Render", readRDS("./Data/Analysis.rds"), analysis.1)


# Save files "Y" or "N"
save <- "Y"


# Plots x-axis
highlimit <- .7
lowlimit <- -.4


cat("SWITCHES: DATA:", analysis, "  SAVE:", save)



Keep_cols <- c("TUI", "Property_Id", "Type", "New", "Quality100000", "Region", "date.val", "YrQtr", "sold.dummy", "IdxPriceA", "PriceA", "DurationVal", "DurationB", "Peak_price", "DurationPeak", "Val_EndA", "GrossProfitB", "Peak_date_start") 


#if (analysis.1!="Render") {
  if (analysis=="All") {
    f <- function(x, pos) subset(x, Type == Type) # i.e. reads in all data because condition always TRUE
      Resold.indexed <-  read_csv_chunked("./Data/Resold.indexed.All.csv", DataFrameCallback$new(f), chunk_size = 2000000)
      Resold.indexed <- as.data.table(Resold.indexed)
      Resold.indexed.regress <- Resold.indexed[, ..Keep_cols]
      Resold.indexed <- NULL
      
      } else {
        Resold.indexed.regress <-fread(paste0("./Data/Resold.indexed.", analysis, ".csv"), select = Keep_cols, na.strings=c("","NA"))
  }
#}



 
   Peak_prices <- fread(paste0("./Data/Peak_prices.", analysis, ".csv"))
   Peak_prices <- Peak_prices[,`:=`(date.val=as.Date(date.val, origin ="1970-01-01"),
                                 Peak_date_start=as.Date(Peak_date_start, origin ="1970-01-01"),
                                 New_peak_date=as.Date(New_peak_date, origin ="1970-01-01"),
                                 Peak_date_end_NA=as.Date(Peak_date_end_NA, origin ="1970-01-01"))]
  


#}
# Data set used for plotting rules
Rules <- fread("./Data/Rules.csv")
Rules <-  Rules[, date.val:=as.Date(date.val, origin ="1970-01-01")]
  

Resold.indexed.regress <- Resold.indexed.regress[,`:=`(SoldInPeriod =ifelse(sold.dummy,TRUE,FALSE),
                                                         Unrealgain=round(IdxPriceA-PriceA),
                                                         Peak.unrealgain=round(IdxPriceA-Peak_price))
                                                 ][,`:=`(UnrealgainYN=Unrealgain >=0, # Unrealised gain (True), unrealised loss (False)
                                                           UnrealgainAdj = Unrealgain/100000, 
                                                           Peak.unrealgainYN=Peak.unrealgain >=0,  
                                                           Peak.unrealgainAdj = Peak.unrealgain/100000,
                                                       Peak_unrealgain_pc= Peak.unrealgain/Peak_price*100)
                                                     ][,`:=`(Type=factor(Type), 
                                                             New=factor(New), 
                                                             YrQtr=factor(YrQtr),
                                                             Region=factor(Region),
                                                             date.val=as.Date(date.val, origin ="1970-01-01"),
                                                             #Peak_date_start=as.Date(Peak_date_start, origin ="1970-01-01"),
                                                             Val_qtr=factor(quarter(date.val)),
                                                             Val_year=factor(year(date.val)),
                                                             Return_purchase_pc = 10000000*UnrealgainAdj/PriceA,
                                                             Return_peak_pc = 10000000*Peak.unrealgainAdj/Peak_price)
                                                       ][,`:=`(Type = relevel(Type, "Flat"), New = relevel(New, "Old"))
                                                         ][order(Property_Id, date.val)] 






# Set variables
fig = 0 # Counter for figures

region.return.labels <- append(paste0("Quintile (neg) ",1:5),paste0("Quintile (pos) ",1:5))

Resold.indexed.regress <- Resold.indexed.regress[,Peak_date_start:= ifelse(Peak_date_start=="", NA, Peak_date_start)][, Peak_date_start := as.Date(Peak_date_start, origin ="1970-01-01")]

```


```{r cache=FALSE, eval=TRUE, echo= FALSE}

# Functions

Comma <- function(x){formatC(x, big.mark=",")}
DP2 <- function(x){format(round(x, 2),nsmall= 2)}


if(analysis == "All1pc"){

CutByReturn <-function(dt, quartile, multiplier) {
  # quartile - Either "All" or quartile name e.g. "1Q"
  if (quartile!="All") {
    dt <- dt[Region.bin==quartile,]
  }
  
  # Calculate the percentiles of return
  return.quantiles <- unique(quantile(dt[,UnrealgainAdj], probs=seq(0,1,.0025/multiplier), na.rm=TRUE))
  # Bin by return percentile
  dt <- dt[,return.bin:=cut2(UnrealgainAdj, return.quantiles)]
  return(dt)
}} else {CutByReturn <-function(dt, quartile, multiplier) {
  # quartile - Either "All" or quartile name e.g. "1Q"
  if (quartile!="All") {
    dt <- dt[Region.bin==quartile,]
  }
  
  # Calculate the percentiles of return
  return.quantiles <- unique(quantile(dt[,UnrealgainAdj], probs=seq(0,1,.0025/multiplier), na.rm=TRUE))
  # Bin by return percentile
  dt <- dt[,return.bin:=cut2(UnrealgainAdj, return.quantiles, include.lowest=TRUE)]
  return(dt)
}}


#### Function to Bin by unrealised  gain
BinByReturn<- function(dt, lowlimit, highlimit, quartile, quartilename, fit, multiplier){
  # dt - datatable
  # lowlimit - limit by minimum Unrealised gain, or  NA if no lower limit
  # highlimit - limit by maximum Unrealised gain, or  NA if no higher limit
  # quartile - subset by quartile or "All"
  # quartilename - name of quartile for faceting or "All"
  # fit - column name of fit[ted] data from predict[ion]() or NA
  # multiplier - where applicable when partitioning data by say All_index proportion that be.g increased in value
  if(!is.na(lowlimit)) dt = dt[UnrealgainAdj>=lowlimit,] # 
  if(!is.na(highlimit)) dt = dt[UnrealgainAdj<=highlimit,] # 
  dt <- CutByReturn(dt, quartile, multiplier)
  if (is.na(fit)) {
    dt <- dt[, .(bin.mean.gain=mean(UnrealgainAdj), proportion.sold=mean(sold.dummy), quartile=quartilename, bin.mean.duration=mean(DurationVal), bin.mean.durB=mean(DurationB)), by=.(return.bin)][order(bin.mean.gain),]
  } else{
    dt <- dt[, .(bin.mean.gain=mean(UnrealgainAdj), proportion.sold=mean(sold.dummy), quartile=quartilename, bin.mean.duration=mean(DurationVal), prob=mean(get(fit)), bin.mean.durB=mean(DurationB)),  by=.(return.bin)][order(bin.mean.gain),]
  }
}


# Rules for reference price - when it is purchase price or random price 2

Purchase_plot <- function(TUI_select, RefPrice) {
    Reference_price <- Rules[TUI==TUI_select, .SD[.N]][, .(Price = get(RefPrice), DoTA, date.val)][,DoTA:=as.Date(DoTA, origin ="1970-01-01")]
  ggplot() +
    geom_line(data = Rules[TUI==TUI_select,.(date.val, IdxPriceA)], aes(x= date.val, y=IdxPriceA), col="grey35", group=1) +
    geom_segment(data=  Reference_price, aes(x = DoTA, y = Price,  xend = date.val, yend = Price), col="green3",  alpha =0.75) +
    labs(x = "Date", y = "Valuation (£)") +
  theme(text = element_text(size=19, family="serif"),
        legend.position = "bottom",
        legend.title.align = .5 ,
        legend.text=element_text(size=19),
        panel.border = element_blank()  ,
        panel.grid = element_blank() ,
        panel.grid.minor = element_blank()) 
  }


# Rules for reference price - when it is random price 1

Random_plot <- function(TUI_select) {
  Reference_price <- Rules[TUI==TUI_select, .SD[.N]][, .(PriceA, Random_price1, DoTA, Random_price1_date, date.val)][,DoTA:=as.Date(DoTA, origin ="1970-01-01")][,Random_price1_date:=as.Date(Random_price1_date, origin ="1970-01-01")]
  ggplot() +
    geom_line(data = Rules[TUI==TUI_select,.(date.val, IdxPriceA)], aes(x= date.val, y=IdxPriceA), col="grey35", group=1)+
    geom_segment(data=  Reference_price, aes(x = DoTA, y = PriceA,  xend = Random_price1_date, yend = PriceA), col="green3",  alpha =0.75, arrow = arrow(ends="both", angle = 20, length = unit(1.5, "mm")))+
    geom_segment(data=  Reference_price, aes(x = Random_price1_date, y = Random_price1,  xend = date.val, yend = Random_price1), col="green3",  alpha =0.75, arrow = arrow(ends="both", angle = 20, length = unit(1.5, "mm"))) +
    labs(x = "Date",
         y = "Valuation (£)")
}

    

Peak_plot <- function(TUI_select, xstart_peakdate, xend_peakdate, yshift) {
    # TUI_select  <- "{662017D1-AECE-41FA-A32D-0263DEBB96B1}"
    # TUI_select  <- "{90518E6A-C3D3-45C1-84F0-EB1C182F3716}"
    #Purchase_price <- Peak_prices[TUI==TUI_select,][, .(Purchase_price=min(Purchase_price), Start=min(get(xstart_peakdate)), End=max(get(xend_peakdate)))]
    PurchaseDate <-unname(head(Rules[TUI==TUI_select,.(date.val)],1))
    Peak_price_sub <- Peak_prices[TUI==TUI_select][, Purchase_date := PurchaseDate][, Segment_end:= Peak_date_start+365.25*3/4]
  
    ggplot(data = Rules[TUI==TUI_select,.(date.val, IdxPriceA)], aes(x= date.val, y=IdxPriceA))+
      geom_line(col="dodgerblue3")+
      #geom_segment(data=Peak_price_sub, aes(x = Purchase_date, y = Peak_price, xend = Segment_end, yend = Peak_price),
      #             linetype = "ff", color="grey", size=.04)+
      geom_point(data = Peak_price_sub, aes(x=Peak_date_start, y=Peak_price), col="dodgerblue3", size=2) +
      #geom_segment(data=Peak_prices[TUI==TUI_select,], aes(x = Peak_date_start, y = Peak_price+yshift, xend = date.val, yend = Peak_price+yshift), col="black",  linetype="dotted", size=0.5) +
      #geom_segment(data=Peak_prices[TUI==TUI_select,], aes(x = get(xstart_peakdate), y = Peak_price, xend = get(xend_peakdate), yend = Peak_price), col="black",  alpha =0.75) +
      #geom_segment(data=  Purchase_price, aes(x = Start, y = Purchase_price,  xend = End, yend = Purchase_price), col="green3",  alpha =0.75) +
      labs(x="", y = "Valuation (£000)")+
      scale_x_date(labels = date_format("%Y-%m")) +
      scale_y_continuous(limits = c(400000, 1700000), 
                         breaks=seq(400000, 1700000, by =400000), 
                         labels =seq(400, 1700, by =400))+
      theme_bw() + theme_classic() +
      theme(text = element_text(size=19, family="serif"),
            legend.position = "bottom",
            legend.title.align = .5 ,
            legend.text=element_text(size=19),
            panel.border = element_blank()  ,
            panel.grid = element_blank() ,
            panel.grid.minor = element_blank()) 
    
  }
  
  
# Rules for reference point when comparing to properties in same postcode
Neighbour_plot <- function(TUI_select, yValue) {
  # TUI_select  <- "{00010A43-4AA6-4B68-B386-DF691D7CD14D}"
  # TUI_select  <- "{03BB0826-8B08-495D-9A78-0087566C8CD1}"
  ggplot()+
    geom_line(data = Rules[TUI==TUI_select,.(date.val, IdxPriceA)], aes(x= date.val, y=IdxPriceA), col="grey35", group=1) + 
    geom_point(data = When.sold[TUI==TUI_select & First_TUI==FALSE,], aes(x=Neighbour.reference.date, y=Neighbour.price), col="blue", pch = 9)+
    geom_point(data =When.sold[TUI==TUI_select & First_TUI==FALSE,], aes(x=Neighbour.reference.date, y=Neighbour.valuation), col="magenta", pch = 9)+
    geom_segment(data=When.sold[TUI==TUI_select,], aes(x = Neighbour.reference.date, y = get(yValue),  xend = Neighbour.date.end, yend = get(yValue)), col="green3",  alpha =0.75, arrow = arrow(ends="both", angle = 20, length = unit(1.5, "mm")))+
    geom_segment(data=When.sold[TUI==TUI_select,], aes(x = Neighbour.reference.date, y = Neighbour.valuation,  xend = Neighbour.reference.date, yend = Neighbour.reference), col="grey50",  lty ="dotted")+
    geom_segment(data=When.sold[TUI==TUI_select,], aes(x = Neighbour.reference.date, y = Neighbour.reference,  xend = Neighbour.reference.date, yend = Neighbour.price), col="grey50",  lty ="dotted")+
    labs(x = "Date",
         y = "Valuation (£)")
}


return_plot <- function(DT, AdjUnrealGain, xlabel) {
  ggplot(DT, aes(get(AdjUnrealGain))) + 
    geom_histogram(aes(y=..density..), binwidth = .005) +
    labs(x = paste0("Return since ", xlabel, " (£1,000)"),
         y = "Density")+
    scale_x_continuous(limits = c(lowlimit, highlimit), 
                       breaks=seq(lowlimit, highlimit, by =0.2), 
                       labels =seq(lowlimit*100, highlimit*100, by =20)) +  
    theme(axis.text.y = element_blank(),
          axis.ticks.y = element_blank()) 
}

dispos_effect_plot <- function(DT, AdjUnrealGain, Valuation.duration, xlabel) {
  data <- DT[, .(UnrealgainAdj = get(AdjUnrealGain), sold.dummy, DurationVal = get(Valuation.duration))][, DurationB := NA]
  data<-BinByReturn(data, lowlimit, highlimit, "All", "All", NA,1)
  dispos.effect <-ggplot(data, aes(x=bin.mean.gain)) + 
    geom_point(aes(y=proportion.sold), shape = 20) +
    scale_x_continuous(paste0("Return since ", xlabel, " (£1,000)"),
                       limits = c(lowlimit, highlimit), 
                       breaks=seq(lowlimit, highlimit, by =0.1), 
                       labels =seq(lowlimit*100, highlimit*100, by =10))+
    scale_y_continuous("Probability of sale",
                       limits = c(0, 0.035),
                       breaks=seq(0, 0.035, by =0.005),
                       labels =seq(0, 0.035, by =0.005)) +
    labs(subtitle ="Limits: x: -£40,000 -- +£70,000;    y: 0 -- 0.035")
}


dispos_effect_plot2 <- function(DT, AdjUnrealGain, Valuation.duration, xlabel, ylimit, tickmark) {
  # Much smaller effect than purchase price or peak price so need to provide y axis limit and number of tick marks
  # quantile() in the CutByReturn function does not give 200 plotting points so have to adjyst the multiplier in BinByReturn to increase the number 
  data <- DT[, .(UnrealgainAdj = get(AdjUnrealGain), sold.dummy, DurationVal = get(Valuation.duration))][, DurationB := NA]
  data<-BinByReturn(data, lowlimit, highlimit, "All", "All", NA,1)
  dispos.effect <-ggplot(data, aes(x=bin.mean.gain)) + 
    geom_point(aes(y=proportion.sold), shape = 20) +
    scale_x_continuous(paste0("Return since ", xlabel, " (£1,000)"),
                       limits = c(lowlimit, highlimit), 
                       breaks=seq(lowlimit, highlimit, by =0.1), 
                       labels =seq(lowlimit*100, highlimit*100, by =10))+
    scale_y_continuous("Probability of sale within 3 months",
                       limits = c(0, ylimit),
                       breaks=seq(0, ylimit, by =ylimit/tickmark),
                       labels =seq(0, ylimit, by =ylimit/tickmark)) 
}


interaction_plot <- function(DT, AdjUnrealGain, Interaction, Valuation.duration, xlabel, legendTitle, ylimit) {
  data <- DT[, .(UnrealgainAdj = get(AdjUnrealGain), sold.dummy, DurationVal = get(Valuation.duration), Interaction = get(Interaction))][, DurationB := NA]
  
  # Calculate proportion of observations where Interacting reference price is a gain so that number of plot markers is in proportion
  multiplierI <- nrow(data[UnrealgainAdj>=lowlimit & UnrealgainAdj<= highlimit & Interaction==TRUE,])/nrow(data[UnrealgainAdj>=lowlimit & UnrealgainAdj<= highlimit,])
  
  data_true <- BinByReturn(data[Interaction==TRUE,], lowlimit, highlimit, "All", "All", NA, multiplierI)
  data_true[, Label:= "Gain"]
  data_false <- BinByReturn(data[Interaction==FALSE,], lowlimit, highlimit, "All", "All", NA, (1- multiplierI))
  data_false[, Label:= "Loss"]
  data_all <- rbind(data_true, data_false)
  data_all <- data_all[, Label:= factor(Label, levels = c("Loss", "Gain"))] 
  
  dispos.effect <- ggplot(data_all, aes(x=bin.mean.gain, y=proportion.sold)) + 
    geom_point(aes(colour=Label), shape = 20) +
    scale_x_continuous(paste0("Return since ", xlabel, " (£1,000)"),
                       limits = c(lowlimit, highlimit), 
                       breaks=seq(lowlimit, highlimit, by =0.1), 
                       labels =seq(lowlimit*100, highlimit*100, by =10))+
    scale_y_continuous("Probability of sale within 3 months",
                       limits = c(0, ylimit),
                       breaks=seq(0, ylimit, by =0.005),
                       labels =seq(0, ylimit, by =0.005)) +
    labs(colour=legendTitle) +
    scale_color_manual(values = c("black", "grey60"))+ 
    theme(legend.position="bottom")
}


interaction_plot2 <- function(DT, AdjUnrealGain, Interaction, InteractionGain, ValuationDuration, xlabel, legendTitle, ylimit, plotLine) {
  data <- DT[, .(UnrealgainAdj = get(AdjUnrealGain), sold.dummy, DurationVal = get(ValuationDuration), Interaction = get(Interaction), InteractionGain=get(InteractionGain))][
    UnrealgainAdj>=lowlimit & UnrealgainAdj<= highlimit,][, DurationB := NA]
  
  # Calculate proportion of observations where Interacting reference price is a gain so that number of plot markers is in proportion
  multiplierI <- nrow(data[Interaction==TRUE,])/nrow(data)
  
  data_true <- data[Interaction==TRUE,][,median_return := median(InteractionGain)][, Band:=ifelse(InteractionGain>=median_return, "High", "Low")] # High positive value
  data_false <- data[Interaction==FALSE,][,median_return := median(InteractionGain)][, Band:=ifelse(InteractionGain>median_return, "Low", "High")] # High negative value
  
  data_true_high <- BinByReturn(data_true[Band=="High",], lowlimit, highlimit, "All", "All", NA, multiplierI/2)
  data_true_high[, Label:= "Gain (high)"]
  data_true_low <- BinByReturn(data_true[Band=="Low",], lowlimit, highlimit, "All", "All", NA, multiplierI/2)
  data_true_low[, Label:= "Gain (low)"]
  
  data_false_high <- BinByReturn(data_false[Band=="High",], lowlimit, highlimit, "All", "All", NA, (1-multiplierI)/2)
  data_false_high[, Label:= "Loss (high)"]
  data_false_low <- BinByReturn(data_false[Band=="Low",], lowlimit, highlimit, "All", "All", NA, (1-multiplierI)/2)
  data_false_low[, Label:= "Loss (low)"]
  data_all <- rbind(data_true_high, data_true_low, data_false_high, data_false_low)
  data_all <-   data_all[, Label:= factor(Label, levels = c("Loss (high)", "Loss (low)", "Gain (low)", "Gain (high)"))] 
  
  
  dispos_effect <- ggplot(data_all, aes(x=bin.mean.gain, y=proportion.sold)) 
    
  if (plotLine=="Y") {
    dispos_effect <-  dispos_effect + geom_line(aes(colour=Label))
  }
    
  dispos_effect <-  dispos_effect + geom_point(aes(colour=Label), shape = 20) +
    scale_x_continuous(paste0("Return since ", xlabel, " (£1,000)"),
                       limits = c(lowlimit, highlimit), 
                       breaks=seq(lowlimit, highlimit, by =0.1), 
                       labels =seq(lowlimit*100, highlimit*100, by =10))+
    scale_y_continuous("Probability of sale",
                       limits = c(0, ylimit),
                       breaks=seq(0, ylimit, by =0.005),
                       labels =seq(0, ylimit, by =0.005)) +
    labs(subtitle =paste0("Limits: x: -£40,000 -- +£70,000;    y: 0 -- ", ylimit), colour=legendTitle) +
    scale_color_brewer(palette = "RdYlGn") +
    theme(legend.position="bottom")
}


interaction_bin <- function(DT, multiplierI){
  data_all <- NULL
  Labels <- c("Low", "Med-low", "Med-high", "High")
  for (InteractionYN in c(TRUE, FALSE))
    {
    data <- DT[Interaction==InteractionYN,]
    label1 <- ifelse(InteractionYN==TRUE, "Gain: ", "Loss: ") 
    multiplier <- ifelse(InteractionYN==TRUE, multiplierI, 1-multiplierI)
    
    quantiles <- unique(quantile(data[, abs(InteractionGain)], probs=seq(0,1,.25), na.rm=TRUE)) # NB absolute value
    data <- data[, Band:=cut(abs(InteractionGain), quantiles, include.lowest=TRUE,  Labels)]

    for (label2 in Labels)
      {
      data_bin <- BinByReturn(data[Band== label2,], lowlimit, highlimit, "All", "All", NA, multiplier/4)
      data_bin <- data_bin[, Label:= paste0(label1, label2)]
      data_all <- rbind(data_all, data_bin)
    }
  }
  
  data_all <- data_all[, Label:=factor(Label, levels = c(paste0("Loss: ", rev(Labels)), paste0("Gain: ", Labels)))]
  
  return(data_all) 
}




interaction_plot4 <- function(DT, AdjUnrealGain, Interaction, InteractionGain, ValuationDuration, xlabel, legendTitle, ylimit, plotLine) {
  data <- DT[, .(UnrealgainAdj = get(AdjUnrealGain), sold.dummy, DurationVal = get(ValuationDuration), Interaction = get(Interaction), InteractionGain=get(InteractionGain))][
    UnrealgainAdj>=lowlimit & UnrealgainAdj<= highlimit,][, DurationB := NA]
  
  # Calculate proportion of observations where Interacting reference price is a gain so that number of plot markers is in proportion
  multiplierI <- nrow(data[Interaction==TRUE,])/nrow(data)
  
  data_all <- interaction_bin(data, multiplierI)
  

  
  dispos_effect <- ggplot(data_all, aes(x=bin.mean.gain, y=proportion.sold)) 
  
  if (plotLine=="Y") {
    dispos_effect <-  dispos_effect + geom_line(aes(colour=Label))
  }
  
  dispos_effect <-  dispos_effect + geom_point(aes(colour=Label), shape = 20) +
    scale_x_continuous(paste0("Return since ", xlabel, " (£1,000)"),
                       limits = c(lowlimit, highlimit), 
                       breaks=seq(lowlimit, highlimit, by =0.1), 
                       labels =seq(lowlimit*100, highlimit*100, by =10))+
    scale_y_continuous("Probability of sale",
                       limits = c(0, ylimit),
                       breaks=seq(0, ylimit, by =0.005),
                       labels =seq(0, ylimit, by =0.005)) +
    labs(subtitle =paste0("Limits: x: -£40,000 -- +£70,000;    y: 0 -- ", ylimit), colour=legendTitle) +
    scale_color_brewer(palette = "RdYlGn") +
    theme(legend.position="bottom", legend.text=element_text(size=rel(0.75)))
}


  



duration_facet_plot <- function(DT, AdjUnrealGain, Valuation.duration, xlabel,ylimit) {
  data <- DT[, .(UnrealgainAdj = get(AdjUnrealGain), sold.dummy, DurationVal = get(Valuation.duration))][, DurationB := NA][UnrealgainAdj>=lowlimit & UnrealgainAdj<= highlimit,]
  
  duration_quartiles <- unique(quantile(data[, DurationVal], probs=seq(0,1,.25), na.rm=TRUE))
  data.DT <- data[, Region.bin:=cut(DurationVal, duration_quartiles, include.lowest=TRUE, labels =  c("1 Shortest", "2 Second shortest", "3 Second longest", "4 Longest"))]
  
  data <- list(data.DT, duration_quartiles) 
  
  #ata_all <- NULL
  #for(val in c("Shortest", "Second shortest", "Second longest", "Longest")) {
  data_1 <- BinByReturn(data[[1]], lowlimit, highlimit, "1 Shortest","1 Shortest", NA, 0.25)
  data_2 <- BinByReturn(data[[1]], lowlimit, highlimit, "2 Second shortest", "2 Second shortest", NA, 0.25)
  data_3 <- BinByReturn(data[[1]], lowlimit, highlimit, "3 Second longest", "3 Second longest", NA, 0.25)
  data_4 <- BinByReturn(data[[1]], lowlimit, highlimit, "4 Longest", "4 Longest", NA, 0.25)
  data_all <- rbind(data_1,  data_2, data_3,  data_4)
  #}
  
  plot <- ggplot(data_all, aes(x=bin.mean.gain, y=proportion.sold)) + 
    geom_point(shape = 20) +
    scale_x_continuous(paste0("Return since ", xlabel, " (£1,000)"),
                       limits = c(lowlimit, highlimit), 
                       breaks=seq(lowlimit, highlimit, by =0.1), 
                       labels =seq(lowlimit*100, highlimit*100, by =10)) +
    scale_y_continuous("Probability of sale",
                       limits = c(0, ylimit),
                       breaks=seq(0, ylimit, by =0.01),
                       labels =seq(0, ylimit, by =0.01)) +
    labs(subtitle =paste0("Limits: x: -£40,000 -- +£70,000;    y: 0 -- ", ylimit)) +
    facet_wrap(. ~ quartile) 
  
  plot_quantile <- list(plot, data[[2]])
  
}


data_prepare <- function(DT, AdjUnrealGain, InteractionGainYN, InteractionGain, ValuationDuration){
  
  data <- DT[, .(UnrealgainAdj = get(AdjUnrealGain), sold.dummy, DurationVal = get(ValuationDuration), Interaction = get(InteractionGainYN), InteractionGain=get(InteractionGain))][
    UnrealgainAdj>=lowlimit & UnrealgainAdj<= highlimit,][, DurationB := NA]
  
  duration_quartiles <- unique(quantile(data[, DurationVal], probs=seq(0,1,.25), na.rm=TRUE))
  data <- data[, Region.bin:=cut(DurationVal, duration_quartiles, include.lowest=TRUE, labels =  c("1 Shortest", "2 Second shortest", "3 Second longest", "4 Longest"))]
  
  data_quantiles <- list(data, duration_quartiles) 
  
}




SplitByReturn <- function(DT, quartilename) {
  
  data <- DT[Region.bin==quartilename,]
  multiplierI <- nrow(data[Interaction==TRUE,])/(nrow(data)) 
 
  data_true <- BinByReturn(data[Interaction==TRUE,], lowlimit, highlimit, "All", quartilename, NA, multiplierI/4)
  data_true[, Label:= "Gain"]

  data_false <- BinByReturn(data[Interaction==FALSE,], lowlimit, highlimit, "All", quartilename, NA, (1-multiplierI)/4)
  data_false[, Label:= "Loss"]

  data_all <- rbind(data_true, data_false)
  data_all <- data_all[, `:=`(Label= factor(Label, levels = c("Loss", "Gain")))] 
  
}

  

SplitByReturn2 <- function(DT, quartilename) {

  data <- DT[Region.bin==quartilename,]
  multiplierI <- nrow(data[Interaction==TRUE,])/(nrow(data)) # 4 facets
  
  data_true <- data[Interaction==TRUE,][,median_return := median(InteractionGain)][, Band:=ifelse(InteractionGain>=median_return, "High", "Low")] # High positive value
  data_false <- data[Interaction==FALSE,][,median_return := median(InteractionGain)][, Band:=ifelse(InteractionGain>median_return, "Low", "High")] # High negative value
  
  data_true_high <- BinByReturn(data_true[Band=="High",], lowlimit, highlimit, "All", quartilename, NA, multiplierI/8) # 4 facets then high & low
  data_true_high[, Label:= "Gain (high)"]
  data_true_low <- BinByReturn(data_true[Band=="Low",], lowlimit, highlimit, "All", quartilename, NA, multiplierI/8)
  data_true_low[, Label:= "Gain (low)"]
  
  data_false_high <- BinByReturn(data_false[Band=="High",], lowlimit, highlimit, "All", quartilename, NA, (1-multiplierI)/8)
  data_false_high[, Label:= "Loss (high)"]
  data_false_low <- BinByReturn(data_false[Band=="Low",], lowlimit, highlimit, "All", quartilename, NA, (1-multiplierI)/8)
  data_false_low[, Label:= "Loss (low)"]
  data_all <- rbind(data_true_high, data_true_low, data_false_high, data_false_low)
  data_all <-   data_all[, `:=`(Label= factor(Label, levels = c("Loss (high)", "Loss (low)", "Gain (low)", "Gain (high)")))] 
  
}


facet_dispos_effect <- function(DT, xlabel, legendTitle, ylimit, plotLine) {
  
  dispos_effect <- ggplot(DT, aes(x=bin.mean.gain, y=proportion.sold))
  
  if (plotLine=="Y") {
  dispos_effect <-  dispos_effect + geom_line(aes(colour=Label))
}
  dispos_effect <-  dispos_effect + 
  geom_point(aes(colour=Label), shape = 20) +
  scale_x_continuous(paste0("Return since ", xlabel, " (£1,000)"),
                     limits = c(lowlimit, highlimit), 
                     breaks=seq(lowlimit, highlimit, by =0.1), 
                     labels =seq(lowlimit*100, highlimit*100, by =10)) +
  scale_y_continuous("Probability of sale",
                     limits = c(0, ylimit),
                     breaks=seq(0, ylimit, by =0.01),
                     labels =seq(0, ylimit, by =0.01)) +
  labs(subtitle =paste0("Limits: x: -£40,000 -- +£70,000;    y: 0 -- ", ylimit), colour=legendTitle) +
  theme(legend.position="bottom") +
  facet_wrap(. ~ quartile)

}


duration_facet_plot2 <- function(DT, AdjUnrealGain, InteractionGainYN, InteractionGain, ValuationDuration, xlabel, legendTitle, ylimit, plotLine) {
  data <- data_prepare(DT, AdjUnrealGain, InteractionGainYN, InteractionGain, ValuationDuration)
  
  data_1 <- SplitByReturn(data[[1]], "1 Shortest")
  data_2 <- SplitByReturn(data[[1]], "2 Second shortest")
  data_3 <- SplitByReturn(data[[1]], "3 Second longest")
  data_4 <- SplitByReturn(data[[1]], "4 Longest")
  data_all <- rbind(data_1, data_2, data_3, data_4)
  
  plot <- facet_dispos_effect(data_all, xlabel, legendTitle, ylimit, plotLine)
  
  plot <- plot +  
    scale_color_manual(values = c("red3", "green3"))
  
  plot_quantile <- list(plot, data[[2]])
}



duration_facet_plot3 <- function(DT, AdjUnrealGain, InteractionGainYN, InteractionGain, ValuationDuration, xlabel, legendTitle, ylimit, plotLine) {
  data <- data_prepare(DT, AdjUnrealGain, InteractionGainYN, InteractionGain, ValuationDuration)
  
  data_1 <- SplitByReturn2(data[[1]], "1 Shortest")
  data_2 <- SplitByReturn2(data[[1]], "2 Second shortest")
  data_3 <- SplitByReturn2(data[[1]], "3 Second longest")
  data_4 <- SplitByReturn2(data[[1]], "4 Longest")
  data_all <- rbind(data_1, data_2, data_3, data_4)
  
  plot <- facet_dispos_effect(data_all, xlabel, legendTitle, ylimit, plotLine)
  
  plot <- plot +
    scale_color_brewer(palette = "RdYlGn")
  
  plot_quantile <- list(plot, data[[2]])
}




plot_facet_2 <- function(dt, AdjUnrealgain, facColumn, fac1Condition, fac1Label, fac1Colour, fac2Condition, fac2Label, fac2Colour, xlabel, ylimit) {
  # everything in quotes except first dt
  # fac - facet
  # AdjUnrealgain - unrealgain/100000 e.g. "UnrealgainAd"
  # facColumn - datatable column holding the conditions ... e.g. "RegionUnrealgainYN"
  # fac1/ fac2Label - e.g. "Loss in region", "Gain in region"
  # fac1/ fac2Colour - "red3", "green3"
  # xlabel - x axis quantity e.g. purchase price
  # ylimit - max limit e.g. 0.085
  multiplierR <- sum(dt[get(AdjUnrealgain)>=lowlimit & get(AdjUnrealgain)<= highlimit, get(facColumn) ==fac1Condition])/dt[get(AdjUnrealgain)>=lowlimit & get(AdjUnrealgain)<= highlimit,.N]
  
  data.fac1<-BinByReturn(dt[get(facColumn)==fac1Condition,], lowlimit, highlimit, "All", "All", NA, multiplierR)
  data.fac1[,Label:= fac1Label]
  
  data.fac2<-BinByReturn(dt[get(facColumn)==fac2Condition,], lowlimit, highlimit, "All", "All", NA, (1-multiplierR))
  data.fac2[,Label:= fac2Label]
  data.all<-rbind(data.fac1,data.fac2)
  data.all <- data.all[, Label := factor(Label, levels= c(fac1Label, fac2Label))]
  
  dispos_effect_facet_2  <- ggplot(data.all, aes(x=bin.mean.gain)) + 
    geom_point(aes(y=proportion.sold, color = Label), shape = 20) +
    labs(x = paste0("Return since ", xlabel, " (£1,000)"),
         y = "Probability of sale",
         #color = "Valued by Region index:",
         subtitle ="Limits: x: -£40,000 -- +£70,000")+
    scale_x_continuous(limits = c(lowlimit, highlimit), 
                       breaks=seq(lowlimit, highlimit, by =0.1), 
                       labels =seq(lowlimit*100, highlimit*100, by =10))+
    scale_y_continuous(limits = c(0, ylimit),
                       breaks=seq(0, ylimit, by =0.01),
                       labels =seq(0, ylimit, by =0.01)) +
    scale_color_manual(values = c(fac1Colour, fac2Colour)) + 
    facet_grid(. ~ Label) + 
    theme(legend.position = "none")
}


regression_variables <- function(dt, Ref_price) {
  # NB Ref_price needs quotes
  
  dt[,F_unrealgain:=round(IdxPriceA-get(Ref_price)) # Calculates unrealised gain or loss - price paid vs index
   ][, F_unrealgainAdj:= F_unrealgain/100000 ]
  setnames(dt, c("F_unrealgain", "F_unrealgainAdj"), c(paste0(Ref_price, "_unrealgain"), paste0(Ref_price, "_unrealgainAdj"))) 
}




region_stats <- function(dt) {
  region <- round(dt[, unname(summary(Region))]/nrow(dt), 4)
    # summarise Region variable in DT, remove names London, etc, turn into proportion
  table <- data.table(Region = "East Midlands", Percentage = 100 * region[1])
  table <- rbind(table, data.table(Region = "East of England", Percentage = 100 * region[2]))
  table <- rbind(table, data.table(Region = "London", Percentage = 100 * region[3]))
  table <- rbind(table, data.table(Region = "North East", Percentage = 100 * region[4]))
  table <- rbind(table, data.table(Region = "North West", Percentage = 100 * region[5]))
  table <- rbind(table, data.table(Region = "South East", Percentage = 100 * region[6]))
  table <- rbind(table, data.table(Region = "South West", Percentage = 100 * region[7]))
  table <- rbind(table, data.table(Region = "Wales", Percentage = 100 * region[8]))
  table <- rbind(table, data.table(Region = "West Midlands", Percentage = 100 * region[9]))
  table <- rbind(table, data.table(Region = "Yorkshire and the Humber", Percentage = 100 * region[10]))
 return(table)
}



summary_stats <- function(dt) {
  type <- round(dt[, unname(summary(Type))]/nrow(dt), 4)
  QuantilePrice <- unname(quantile(dt[, Val_EndA], c(0.1, 0.25, 0.5, 0.75, 0.9), na.rm = TRUE))
  QuantileDuration <- unname(quantile(dt[, DurationB], c(0.1, 0.25, 0.5, 0.75, 0.9), na.rm = TRUE)) # Duration of each property's final purchase very likely to go beyond  dataset end date
  # summarise Type variable in DT, remove names Flat, Detached etc, turn into proportion
  
  table <- data.table(Description=c("Purchase Price (£)","Time to Sale (Years)", "BLANK", "Property Type", "Flat (%)", "Detached (%)", "Semi-Detached (%)", "Terraced (%)", "BLANK", "New-Build (%)", "BLANK", "N Property X Quarter"),
                      Mean=c(Comma(as.integer(mean(dt[, Val_EndA], na.rm = TRUE))), round(mean(dt[, DurationB], na.rm = TRUE),2), " ", " ", DP2(100*type[1]), DP2(100*type[2]), DP2(100*type[3]), DP2(100*type[4]), " ", DP2(100*nrow(dt[New=="New-build",])/nrow(dt)), " ", Comma(nrow(dt))),
                      SD=c(Comma(as.integer(sd(dt[, Val_EndA], na.rm = TRUE))), round(sd(dt[, DurationB], na.rm = TRUE),2), rep(" ", 10)),
                      P10=c(Comma(as.integer(QuantilePrice[1])), round(QuantileDuration[1],2), rep(" ", 10)),
                      P25=c(Comma(as.integer(QuantilePrice[2])), round(QuantileDuration[2],2), rep(" ", 10)),
                      P50=c(Comma(as.integer(QuantilePrice[3])), round(QuantileDuration[3],2), rep(" ", 10)),
                      P75=c(Comma(as.integer(QuantilePrice[4])), round(QuantileDuration[4],2), rep(" ", 10)),
                      P90=c(Comma(as.integer(QuantilePrice[5])), round(QuantileDuration[5],2), rep(" ", 10)))
  
  
  return(table)
}

return_summary <- function(dt, Ref_price) {
  # NB Ref_price needs quotes
  
  if (Ref_price=="PriceA") {Ref_event <- "Purchase"
  } else if (Ref_price=="Peak_price") {Ref_event <- "Peak"
  } else if (Ref_price=="Neighbour.valuation") {Ref_event <- "Neighbour Sale"
  } else { stop("PriceA, Peak_price, Neighbour.valuation") }
  
  dt <- dt[, .(IdxPriceA, Reference=get(Ref_price))][
    , GainGBP:=IdxPriceA-Reference][
      , `:=`(GainYN = ifelse(GainGBP>=0, TRUE, FALSE),
             GainPercent = 100*GainGBP/Reference)]
  
  QuantileGBP <- unname(quantile(dt[, GainGBP], na.rm = TRUE, c(0.1, 0.25, 0.5, 0.75, 0.9)))
  QuantilePercent <- unname(quantile(dt[, GainPercent], na.rm = TRUE, c(0.1, 0.25, 0.5, 0.75, 0.9)))
  
  
  if (Ref_price=="PriceA" | Ref_price=="Neighbour.valuation") {
    
    table <- data.table(Description=c(paste0("Return Since ", Ref_event, " (£) "),paste0("Return Since ", Ref_event, " (%) "), "BLANK", paste0("Return Since ", Ref_event, " >= 0 (%) ")),
                        Mean=c(Comma(as.integer(mean(dt[, GainGBP], na.rm = TRUE))), round(mean(dt[, GainPercent], na.rm = TRUE),2), " ", round(sum(dt[, GainYN])*100/nrow(dt),2)),
                        SD=c(Comma(as.integer(sd(dt[, GainGBP], na.rm = TRUE))), round(sd(dt[, GainPercent], na.rm = TRUE),2), " ", " "),
                        P10=c(Comma(as.integer(QuantileGBP[1])), round(QuantilePercent[1],2), " ", " "),
                        P25=c(Comma(as.integer(QuantileGBP[2])), round(QuantilePercent[2],2), " ", " "),
                        P50=c(Comma(as.integer(QuantileGBP[3])), round(QuantilePercent[3],2), " ", " "),
                        P75=c(Comma(as.integer(QuantileGBP[4])), round(QuantilePercent[4],2), " ", " "),
                        P90=c(Comma(as.integer(QuantileGBP[5])), round(QuantilePercent[5],2), " ", " "))
    
    
  } else { 
    N_peak <- Peak_prices[, .N, by =.(TUI)]
    QuantileNo <- unname(quantile(N_peak[, N], c(0.1, 0.25, 0.5, 0.75, 0.9)))
    
    table <- data.table(Description=c(paste0("Return Since ", Ref_event, " (£) "),paste0("Return Since ", Ref_event, " (%) "), "Price Peaks Per Property","BLANK", paste0("Return Since ", Ref_event, " >= 0 (%) ")),
                        Mean=c(Comma(as.integer(mean(dt[, GainGBP], na.rm = TRUE))), round(mean(dt[, GainPercent], na.rm = TRUE),2), round(mean(N_peak[, N], na.rm = TRUE),2), " ", round(sum(dt[, GainYN], na.rm = TRUE)*100/nrow(dt),2)),
                        SD=c(Comma(as.integer(sd(dt[, GainGBP], na.rm = TRUE))), round(sd(dt[, GainPercent], na.rm = TRUE),2), round(sd(N_peak[, N], na.rm = TRUE),2), " ", " "),
                        P10=c(Comma(as.integer(QuantileGBP[1])), round(QuantilePercent[1],2), round(QuantileNo[1],2), " ", " "),
                        P25=c(Comma(as.integer(QuantileGBP[2])), round(QuantilePercent[2],2), round(QuantileNo[2],2)," ", " "),
                        P50=c(Comma(as.integer(QuantileGBP[3])), round(QuantilePercent[3],2), round(QuantileNo[3],2)," ", " "),
                        P75=c(Comma(as.integer(QuantileGBP[4])), round(QuantilePercent[4],2), round(QuantileNo[4],2)," ", " "),
                        P90=c(Comma(as.integer(QuantileGBP[5])), round(QuantilePercent[5],2), round(QuantileNo[5],2)," ", " "))
    
  }
}

reference_difference_plot <- function(DT, Purchase_date, Alt_date, xaxis) {
  data <- DT[, Difference := 4 * round((get(Purchase_date)%--%get(Alt_date)/dyears(1)),2)]
  ggplot(DT, aes(Difference)) + 
    geom_histogram(aes(y=..density..), binwidth = 1) +
    labs(x = paste0("Difference between ", xaxis, " dates (quarters)"),
         y = "Density",
         subtitle ="Binwidth = 1 quarter")+  
    theme(axis.text.y = element_blank(),
          axis.ticks.y = element_blank()) 
}


  
  return_plot_pc <- function(DT, AdjUnrealGain_pc, xlabel, xlimit) {
    ggplot(DT, aes(get(AdjUnrealGain_pc))) + 
      geom_histogram(aes(y=..density..), binwidth = .05) +
      scale_x_continuous(limits = c(NA, xlimit))+
      labs(x = paste0("Return since ", xlabel, " (%)"),
           y = "Density")+theme(axis.text.y = element_blank(),
            axis.ticks.y = element_blank()) 
  }
  
  

return_freq <- function(DT, Gain, xlabel) {
  ggplot(DT, aes(get(Gain))) + 
    geom_histogram(binwidth = .01, fill="transparent",color="black") + # Bin width =£1,000 (1000/100000)
    labs(x = paste0("Return Since ", xlabel, " (£1,000)"),
         y = "Frequency") +
    scale_x_continuous(limits = c(lowlimit, highlimit), 
                       breaks=seq(lowlimit, highlimit, by =0.2),
                       labels =seq(lowlimit*100000, highlimit*100000, by =20000)) +
    theme_bw() + theme_classic() +
    theme(text = element_text(size=19, family="serif"),
          legend.position = "bottom",
          legend.title.align = .5 ,
          legend.text=element_text(size=19),
          panel.border = element_blank()  ,
          panel.grid = element_blank() ,
          panel.grid.minor = element_blank()) 
}

return_freq_pc <- function(DT, Gain_pc, xlabel, xlimit) {
  ggplot(DT, aes(get(Gain_pc))) + 
    geom_histogram(binwidth = 1, fill="transparent",color="black") +
    scale_x_continuous(limits = c(NA, xlimit))+
    labs(x = paste0("Return Since ", xlabel, " (%)"),
         y = "Frequency") +
    theme_bw() + theme_classic() +
    theme(text = element_text(size=19, family="serif"),
          legend.position = "bottom",
          legend.title.align = .5 ,
          legend.text=element_text(size=19),
          panel.border = element_blank()  ,
          panel.grid = element_blank() ,
          panel.grid.minor = element_blank()) 
}
 
  
if(analysis == "All1pc"){
  
  CutByReturnCI <-function(dt, quartile, multiplier) {
    # quartile - Either "All" or quartile name e.g. "1Q"
    if (quartile!="All") {
      dt <- dt[Region.bin==quartile,]
    }
    
    # Calculate the percentiles of return
    return.quantiles <- unique(quantile(dt[,UnrealgainAdj], probs=seq(0,1,.005/multiplier), na.rm=TRUE))
    # Bin by return percentile
    dt <- dt[,return.bin:=cut2(UnrealgainAdj, return.quantiles)]
    return(dt)
  }} else {CutByReturnCI <-function(dt, quartile, multiplier) {
    # quartile - Either "All" or quartile name e.g. "1Q"
    if (quartile!="All") {
      dt <- dt[Region.bin==quartile,]
    }
    
    # Calculate the percentiles of return
    return.quantiles <- unique(quantile(dt[,UnrealgainAdj], probs=seq(0,1,.005/multiplier), na.rm=TRUE))
    # Bin by return percentile
    dt <- dt[,return.bin:=cut2(UnrealgainAdj, return.quantiles, include.lowest=TRUE)]
    return(dt)
  }}

#### Function to Bin by unrealised  gain
BinByReturnCI<- function(dt, lowlimit, highlimit, quartile, quartilename, multiplier){
  # dt - datatable
  # lowlimit - limit by minimum Unrealised gain, or  NA if no lower limit
  # highlimit - limit by maximum Unrealised gain, or  NA if no higher limit
  # quartile - subset by quartile or "All"
  # quartilename - name of quartile for faceting or "All"
  # multiplier - where applicable when partitioning data by say All_index proportion that be.g increased in value
  if(!is.na(lowlimit)) dt = dt[UnrealgainAdj>=lowlimit,] # 
  if(!is.na(highlimit)) dt = dt[UnrealgainAdj<=highlimit,] # 
  dt <- CutByReturnCI(dt, quartile, multiplier)
  dt <- dt[, .(bin.mean.gain=mean(UnrealgainAdj), proportion.sold=mean(sold.dummy), sum=sum(sold.dummy), length=length(sold.dummy), quartile=quartilename, bin.mean.duration=mean(DurationVal)), by=.(return.bin)][
    order(bin.mean.gain),]

  CI <- data.table(ci.binomial(dt[,sum], dt[,length]))
  dt <- cbind(dt, CI[,.(exact.lower95ci, exact.upper95ci)])
  
  } 

dispos_effect_plotCI <- function(DT, AdjUnrealGain, Valuation.duration, xlabel, xlimit, ylimit) {
  # Much smaller effect than purchase price or peak price so need to provide y axis limit and number of tick marks
  # quantile() in the CutByReturn function does not give 200 plotting points so have to adjyst the multiplier in BinByReturn to increase the number 
  data <- DT[, .(UnrealgainAdj = get(AdjUnrealGain), sold.dummy, DurationVal = get(Valuation.duration))][, DurationB := NA]
  data<-BinByReturnCI(data, lowlimit, highlimit, "All", "All", 1)

  dispos.effect <-ggplot(data, aes(x=bin.mean.gain)) +
    geom_point(aes(y=proportion.sold),  pch=20)+ 
    geom_linerange(aes(ymin = exact.lower95ci, ymax = exact.upper95ci), size=0.25)+
    scale_x_continuous(paste0("Return Since ", xlabel, " (£1,000)"),
                       limits = c(lowlimit, highlimit), 
                       breaks=seq(lowlimit, highlimit, by =0.1), 
                       labels =seq(lowlimit*100, highlimit*100, by =10))+
    scale_y_continuous("Probability of Selling Property",
                       limits = c(xlimit, ylimit)) +
    scale_color_manual(values = c("black","gray59"))+
    theme_bw() + theme_classic() +
    theme(text = element_text(size=19, family="serif"),
          legend.position = "bottom",
          legend.title.align = .5 ,
          legend.text=element_text(size=19),
          panel.border = element_blank()  ,
          panel.grid = element_blank() ,
          panel.grid.minor = element_blank()) 
}





interaction_plotCI <- function(DT, AdjUnrealGain, Interaction, Valuation.duration, xlabel, legendTitle, xlimit, ylimit) {
  data <- DT[, .(UnrealgainAdj = get(AdjUnrealGain), sold.dummy, DurationVal = get(Valuation.duration), Interaction = get(Interaction))][, DurationB := NA]
  
  # Calculate proportion of observations where Interacting reference price is a gain so that number of plot markers is in proportion
  multiplierI <- nrow(data[UnrealgainAdj>=lowlimit & UnrealgainAdj<= highlimit & Interaction==TRUE,])/nrow(data[UnrealgainAdj>=lowlimit & UnrealgainAdj<= highlimit,])
  
  data_true <- BinByReturnCI(data[Interaction==TRUE,], lowlimit, highlimit, "All", "All", multiplierI)
  data_true[, Label:= "Gain = 1"]
  data_false <- BinByReturnCI(data[Interaction==FALSE,], lowlimit, highlimit, "All", "All", (1- multiplierI))
  data_false[, Label:= "Loss = 1"]
  data_all <- rbind(data_true, data_false)
  data_all <- data_all[, Label:= factor(Label, levels = c("Loss = 1", "Gain = 1"))] 
  
  dispos.effect <- ggplot(data_all, aes(x=bin.mean.gain, y=proportion.sold, colour=Label)) + 
    geom_point(shape = 20) +
    geom_linerange(aes(ymin = exact.lower95ci, ymax = exact.upper95ci), size=0.25)+
    scale_x_continuous(paste0("Return Since ", xlabel, " (£1,000)"),
                       limits = c(lowlimit, highlimit), 
                       breaks=seq(lowlimit, highlimit, by =0.1), 
                       labels =seq(lowlimit*100, highlimit*100, by =10))+
    scale_y_continuous("Probability of Selling Property",
                       limits = c(xlimit, ylimit)) +
    labs(colour=legendTitle) +
    scale_color_manual(values = c("lightslategray","black","lightslategray", "gray59"))+
    theme_bw() + theme_classic() +
    theme(text = element_text(size=19, family="serif"),
          legend.position = "bottom",
          legend.title.align = .5 ,
          legend.text=element_text(size=19),
          panel.border = element_blank()  ,
          panel.grid = element_blank() ,
          panel.grid.minor = element_blank()) 
}



residual_plot <- function(Peak.lm){
  Peak.res <- resid(Peak.lm)
  Peak.res.dt <- data.table(Residual =Peak.res)
  peak_residual <-  ggplot(Peak.res.dt) + 
    geom_histogram(aes(Residual.Peak.unrealgain), binwidth = 1000, fill="transparent",color="black") + # Bin width =£1,000 (1000/100000)
    labs(x = paste0("Return Since Peak (£)"),
         y = "Frequency") +
    scale_x_continuous(limits = c(-100000, 100000))+
    theme_bw() + theme_classic() +
    theme(text = element_text(size=19, family="serif"),
          legend.position = "bottom",
          legend.title.align = .5 ,
          legend.text=element_text(size=19),
          panel.border = element_blank()  ,
          panel.grid = element_blank() ,
          panel.grid.minor = element_blank()) 
}

residual_pc_plot <- function(Peak.pc.lm){
  Peak.res <- resid(Peak.pc.lm)
  Peak.res.dt <- data.table(Residual =Peak.res)
  peak_residual <- ggplot(Peak.res.dt) + 
    geom_histogram(aes(Residual.Peak_unrealgain_pc), binwidth = 1, fill="transparent",color="black") + # Bin width =£1,000 (1000/100000)
    labs(x = paste0("Return Since Peak (%)"),
         y = "Frequency") +
    scale_x_continuous(limits = c(-60, 60))+
    theme_bw() + theme_classic() +
    theme(text = element_text(size=19, family="serif"),
          legend.position = "bottom",
          legend.title.align = .5 ,
          legend.text=element_text(size=19),
          panel.border = element_blank()  ,
          panel.grid = element_blank() ,
          panel.grid.minor = element_blank()) 
  
}







   
  
  
```

# Summary statistics

```{r cache=FALSE, eval=TRUE, echo=FALSE}


SumStatsRandom <- summary_stats(Resold.indexed.regress)
SumStatsRandom


# SumStatsRandom is equivalent to SumStats..Incl3yr

if(save=="Y"){
  fwrite(SumStatsRandom, paste0("./Tables/SumStats",analysis,"Purchase.csv"))
}  

SumStatsRandomB <- summary_stats(Resold.indexed.regress[!is.na(Peak_price),])
SumStatsRandomB


# SumStatsRandom is equivalent to SumStats..Incl3yr

if(save=="Y"){
  fwrite(SumStatsRandomB, paste0("./Tables/SumStats",analysis,"Both.csv"))
}  


```


# Region summary


```{r cache=FALSE, eval=TRUE, echo=FALSE}

RegionRandom <- region_stats(Resold.indexed.regress)
RegionRandom

if(save=="Y"){
  fwrite(RegionRandom, paste0("./Tables/RegionStats",analysis,"Purchase.csv"))
}  


RegionRandomB <- region_stats(Resold.indexed.regress[!is.na(Peak_price),])
RegionRandomB

if(save=="Y"){
  fwrite(RegionRandomB, paste0("./Tables/RegionStats",analysis,"Both.csv"))
}  


```

# Return summary

```{r cache=FALSE, eval=TRUE, echo=FALSE}

PurchaseRetSumBase <- return_summary(Resold.indexed.regress[!is.na(Peak_price),], "PriceA")
print("Return summary: Purchase price")
PurchaseRetSumBase

print("Return summary: Peak price")
PeakRetSumBase <- return_summary(Resold.indexed.regress[!is.na(Peak_price),], "Peak_price")
PeakRetSumBase



if(save=="Y"){
  fwrite(PurchaseRetSumBase, paste0("./Tables/PurchaseRetSum", analysis, "Both.csv"))
  fwrite(PeakRetSumBase, paste0("./Tables/PeakRetSum", analysis, "Both.csv"))
} 


PurchaseRetSumBaseP <- return_summary(Resold.indexed.regress, "PriceA")
print("Return summary: Purchase price")
PurchaseRetSumBaseP



if(save=="Y"){
  fwrite(PurchaseRetSumBaseP, paste0("./Tables/PurchaseRetSum", analysis, "Purchase.csv"))

} 

```



```{r cache=FALSE, eval=FALSE, echo=FALSE}

print("Return summary: Neighbour price")
NeighbourRetSumBase <- return_summary(Resold.indexed.regress, "Neighbour.valuation")
NeighbourRetSumBase


  fwrite(NeighbourRetSumBase, paste0("./Tables/NeighbourRetSum", analysis, "Incl3yr.csv"))

```





# Purchase price (figures)

## Rule 

```{r cache=FALSE, eval=TRUE, echo=FALSE}

# Rule when reference price is purchase price

Purchase_plot1 <- Purchase_plot("{00D34FC5-8B2D-4854-A6A7-49036F273611}", "PriceA")
print(Purchase_plot1)

if (save =="Y") {
ggsave(paste0("./Plots/LaTeX/Purchase.price.pdf"), dpi=600)
ggsave(paste0("./Plots/Purchase.price.png"), dpi=600)
}



```



## Return since purchase.

```{r cache=FALSE, eval=TRUE, echo=FALSE}

# 1)  Figure 1: Histogram of return since purchase in £. Use the same x-axis width as in Figure 2 below. Choose a bin width that is easy to visually interpret, e.g. £5k. DONE BINWIDTH £1K

#Full sample
return_since_purchase <- return_plot(Resold.indexed.regress,"UnrealgainAdj", "purchase")
print(return_since_purchase)



if (save =="Y") {
ggsave(paste0("./Plots/LaTeX/return_since_purchase", analysis, "Purchase.pdf"), return_since_purchase, dpi=600)
ggsave(paste0("./Plots/return_since_purchase", analysis, "Purchase.png"), return_since_purchase, dpi=600)
}

fig = fig + 1
cat("Fig ",fig, "Return since purchase")


cat("Proportion of observations that are gains", nrow(Resold.indexed.regress[UnrealgainYN =="TRUE",])/nrow(Resold.indexed.regress))

return_since_purchase_pc <- return_plot_pc(Resold.indexed.regress,"Return_purchase_pc", "purchase", 100)
print(return_since_purchase_pc)

  
  if (save =="Y") {
    ggsave(paste0("./Plots/LaTeX/return_since_purchase_pc", analysis, "Purchase.pdf"), return_since_purchase_pc, dpi=600)
    ggsave(paste0("./Plots/return_since_purchase_pc", analysis, "Purchase.png"),  return_since_purchase_pc, dpi=600)
  }
  

Return_Purchase <- plot_grid(return_since_purchase, return_since_purchase_pc, align = "h", nrow =1)
print(Return_Purchase)
if (save =="Y") {
    ggsave(paste0("./Plots/LaTeX/return_purchase_density_grid", analysis, "Purchase.pdf"), Return_Purchase, dpi=600)
    ggsave(paste0("./Plots/return_purchase_density_grid", analysis, "Purchase.png"),  Return_Purchase, dpi=600)
  }




# Peak & purchase sub-sample (Both)
return_since_purchaseB <- return_plot(Resold.indexed.regress[!is.na(Peak_price),],"UnrealgainAdj", "purchase")
print(return_since_purchaseB)



if (save =="Y") {
ggsave(paste0("./Plots/LaTeX/return_since_purchase", analysis, "Both.pdf"), return_since_purchaseB, dpi=600)
ggsave(paste0("./Plots/return_since_purchase", analysis, "Both.png"), return_since_purchaseB, dpi=600)
}

fig = fig + 1
cat("Fig ",fig, "Return since purchase")


cat("Proportion of observations that are gains", nrow(Resold.indexed.regress[UnrealgainYN =="TRUE",])/nrow(Resold.indexed.regress))

return_since_purchase_pcB <- return_plot_pc(Resold.indexed.regress[!is.na(Peak_price),],"Return_purchase_pc", "purchase", 100)
print(return_since_purchase_pc)

  
  if (save =="Y") {
    ggsave(paste0("./Plots/LaTeX/return_since_purchase_pc", analysis, "Both.pdf"), return_since_purchase_pcB, dpi=600)
    ggsave(paste0("./Plots/return_since_purchase_pc", analysis, "Both.png"),  return_since_purchase_pcB, dpi=600)
  }
  


Return_Both <- plot_grid(return_since_purchaseB, return_since_purchase_pcB, align = "h", nrow =1)
print(Return_Both)
if (save =="Y") {
    ggsave(paste0("./Plots/LaTeX/return_purchase_density_grid", analysis, "Both.pdf"), Return_Both, dpi=600)
    ggsave(paste0("./Plots/return_purchase_density_grid", analysis, "Both.png"),  Return_Both, dpi=600)
  }



```


***

## Disposition effect plot 

```{r cache=FALSE, eval=FALSE, echo=FALSE}

# 2) Figure 2: Disposition effect plot (Figure 1 in the latest supervision html). Use the figure exactly as it is in the current plot, except i) start the x-axis range at -40; ii) label the x-axis 'Return Since Purchase' iii) double the number of dots DONE

dispos.effect <- dispos_effect_plot2(Resold.indexed.regress,"UnrealgainAdj", "DurationVal", "purchase", .025, 5)
print(dispos.effect)

if (save =="Y") {
ggsave(paste0("./Plots/LaTeX/dispos_effect", analysis, "Purchase.pdf"), dispos.effect, dpi=600)
ggsave(paste0("./Plots/dispos_effect", analysis, "Purchase.png"), dispos.effect, dpi=600)
}


dispos.effectB <- dispos_effect_plot2(Resold.indexed.regress[!is.na(Peak_price),],"UnrealgainAdj", "DurationVal", "purchase", .025, 5)
print(dispos.effectB)

if (save =="Y") {
ggsave(paste0("./Plots/LaTeX/dispos_effect", analysis, "Both.pdf"), dispos.effectB, dpi=600)
ggsave(paste0("./Plots/dispos_effect", analysis, "Both.png"), dispos.effectB, dpi=600)
}







fig = fig + 1
cat("Fig ",fig, "Probability of sale given return since purchase.")

```



```{r cache=FALSE, eval=TRUE, echo=FALSE}
dispose_effect_purchaseCI <- dispos_effect_plotCI(Resold.indexed.regress,"UnrealgainAdj", "DurationVal", "Purchase", 0.005, .016)
print(dispose_effect_purchaseCI)
ggsave(paste0("./Plots/LaTeX/dispos_effectCI", analysis, "Purchase.pdf"), dispose_effect_purchaseCI, dpi=600, width = 10, height = 5)
ggsave(paste0("./Plots/dispos_effectCI", analysis, "Purchase.png"), dispose_effect_purchaseCI, dpi=600, width = 10, height = 5)


dispose_effect_purchaseCI_B <- dispos_effect_plotCI(Resold.indexed.regress[!is.na(Peak_price),],"UnrealgainAdj", "DurationVal", "Purchase", 0.005, 0.0135)
print(dispose_effect_purchaseCI_B)
ggsave(paste0("./Plots/LaTeX/dispos_effectCI", analysis, "Both.pdf"), dispose_effect_purchaseCI_B, dpi=600, width = 10, height = 5)
ggsave(paste0("./Plots/dispos_effectCI", analysis, "Both.png"), dispose_effect_purchaseCI_B, dpi=600, width = 10, height = 5)


dispose_effect_peakCI <- dispos_effect_plotCI(Resold.indexed.regress[!is.na(Peak_price),],"Peak.unrealgainAdj", "DurationPeak", "Peak", 0.007, .014)
print(dispose_effect_peakCI)
ggsave(paste0("./Plots/LaTeX/dispos_effect_peakCI", analysis, "Both.pdf"), dispose_effect_peakCI, dpi=600, width = 10, height = 5)
ggsave(paste0("./Plots/dispos_effect_peakCI", analysis, "Both.png"), dispose_effect_peakCI, dpi=600, width = 10, height = 5)

interaction_plot_peakCI <- interaction_plotCI(Resold.indexed.regress[!is.na(Peak_price),],"UnrealgainAdj", "Peak.unrealgainYN", "DurationVal", "Purchase", "Return Since Peak", 0.005, 0.017)
print(interaction_plot_peakCI)

ggsave(paste0("./Plots/LaTeX/interaction_plot_peakCI", analysis, "Both.pdf"), interaction_plot_peakCI, dpi=600, width = 10, height = 5)
ggsave(paste0("./Plots/interaction_plot_peakCI", analysis, "Both.png"), interaction_plot_peakCI, dpi=600, width = 10, height = 5)
```




***


# Peak price (figures)

## Rules

For rule plots to work, random samples must contain correct TUI

```{r, cache=FALSE, echo = FALSE, out.width="75%", eval=TRUE}

# Different rules

print("a) red line is the testing period i.e.  a peak for at least 3 quarters;") 
print("b) blue line is the period where the peak is a reference point") 


Peak_price_plot1 <- Peak_plot("{662017D1-AECE-41FA-A32D-0263DEBB96B1}", "date.val", "New_peak_date",0)
print(Peak_price_plot1)
fig = fig + 1
cat("Fig ",fig, "Illustration of peak price rule. New reference peak at the end of successful test period")



if (save == "Y") {
  # Try Resold.indexed.regress.All1pcRules.csv & Resold.indexed.All1pcRules.csv Peak_prices.All1pcRules.csv
  ggsave("./Plots/LaTeX/Peak_price1.pdf", Peak_price_plot1, dpi=600, width = 10, height = 5)
  ggsave("./Plots/Peak_price1.png", Peak_price_plot1, dpi=600, width = 10, height = 5)
  

}



```


```{r, cache=FALSE, echo = FALSE, out.width="75%", eval=FALSE}

# Different rules




Peak_price_plot2 <- Peak_plot("{662017D1-AECE-41FA-A32D-0263DEBB96B1}", "Peak_date_start", "Peak_date_end_NA", 20000)
print(Peak_price_plot2)
#fig = fig + 1
#cat("Fig ",fig, "New reference peak at the beginning of successful test period")



Peak_price_plot3 <- Peak_plot("{662017D1-AECE-41FA-A32D-0263DEBB96B1}", "date.val", "Peak_date_end_NA", 0)
print(Peak_price_plot3)
#fig = fig + 1
#cat("Fig ",fig, "New reference peak at the end of successful test period, no reference point during successful test period")



if (save == "Y") {

  ggsave("./Plots/LaTeX/Peak_price2.pdf", Peak_price_plot2, dpi=600)
  ggsave("./Plots/Peak_price2.png", Peak_price_plot2, dpi=600)
  
  ggsave("./Plots/LaTeX/Peak_price3.pdf", Peak_price_plot3, dpi=600)
  ggsave("./Plots/Peak_price3.png", Peak_price_plot3, dpi=600)
  
}



```




***

## New reference peak at the end of successful test period



***


### Including all property x quarters in the first 3 years since first sale

***
```{r cache=FALSE, eval=TRUE, echo=FALSE}

return_since_peak <- return_plot(Resold.indexed.regress[!is.na(Peak_price),],"Peak.unrealgainAdj", "peak")
print(return_since_peak)

return_since_peak_pc <- return_plot_pc(Resold.indexed.regress[!is.na(Peak_price),],"Return_peak_pc", "peak", 50)
print(return_since_peak_pc)

fig = fig + 1
cat("Fig ",fig, "Return since purchase (10% sample). Including all property x quarters in the first 3 years since first sale")

Return_Peak <- plot_grid(return_since_peak, return_since_peak_pc, align = "h", nrow =1)
print(Return_Peak)
if (save =="Y") {
    ggsave(paste0("./Plots/LaTeX/return_density_grid", analysis, "Peak.pdf"), Return_Both, dpi=600)
    ggsave(paste0("./Plots/return_density_grid", analysis, "Peak.png"),  Return_Both, dpi=600)
  }




```

***
```{r cache=FALSE, eval=TRUE, echo=FALSE}

dispos.effect.peak <- dispos_effect_plot2(Resold.indexed.regress[!is.na(Peak_price),],"Peak.unrealgainAdj", "DurationPeak", "peak", .025, 5)
print(dispos.effect.peak)

fig = fig + 1
cat("Fig ",fig, "Probability of sale given return since purchase. Including all property x quarters in the first 3 years since first sale")

```



```{r cache=FALSE, eval=TRUE, echo=FALSE}

if (save =="Y") {

ggsave(paste0("./Plots/LaTeX/return_since_peak1", analysis, "Both.pdf"), return_since_peak, dpi=600)
ggsave(paste0("./Plots/return_since_peak1", analysis, "Both.png"), return_since_peak, dpi=600)

ggsave(paste0("./Plots/LaTeX/return_since_peak1_pc", analysis, "Both.pdf"), return_since_peak_pc, dpi=600)
ggsave(paste0("./Plots/return_since_peak1_pc", analysis, "Both.png"), return_since_peak_pc, dpi=600)
  
ggsave(paste0("./Plots/LaTeX/dispos_effect_peak1", analysis, "Both.pdf"), dispos.effect.peak, dpi=600)
ggsave(paste0("./Plots/dispos_effect_peak1", analysis, "Both.png"), dispos.effect.peak, dpi=600)
}


```


***



## Peak date distribution 


```{r cache=FALSE, eval=TRUE, echo=FALSE}

Peak_date <- Peak_prices[, .SD[1], by = .(TUI, Peak_date_start)]

peak_start<- ggplot(Peak_date, aes(Peak_date_start)) + 
  geom_bar() +
  labs(x = "Peak Start Date", y = "Number of Properties") +
  theme_bw() + theme_classic() +
  theme(text = element_text(size=19, family="serif"),
        legend.position = "bottom",
        legend.title.align = .5 ,
        legend.text=element_text(size=19),
        panel.border = element_blank()  ,
        panel.grid = element_blank() ,
        panel.grid.minor = element_blank()) 


print(peak_start)

ggsave(paste0("./Plots/LaTeX/peak_start", analysis, ".pdf"), peak_start, dpi=600)
ggsave(paste0("./Plots/peak_start", analysis, ".png"), peak_start, dpi=600)



```

## Return Since Peak Residual

Regress against year-quarter dummies (no constant) and plot the residuals. 

### Histogram
```{r cache=FALSE, eval=TRUE, echo=FALSE, out.width="75%"}

peak_residual <-  residual_plot(felm(Peak.unrealgain~ DurationPeak + YrQtr + 0, data=Resold.indexed.regress[!is.na(Peak_price),], na.action=na.exclude))

print(peak_residual)
ggsave(paste0("./Plots/LaTeX/peak_residual", analysis, ".pdf"), peak_residual, dpi=600)
ggsave(paste0("./Plots/peak_residual", analysis, ".png"), peak_residual, dpi=600)

```

```{r cache=FALSE, eval=FALSE, echo=FALSE, out.width="75%"}



peak_pc_residual <- residual_pc_plot(felm(Peak_unrealgain_pc~ DurationPeak + YrQtr + 0, data=Resold.indexed.regress[!is.na(Peak_price),], na.action=na.exclude))


print(peak_pc_residual)
ggsave(paste0("./Plots/LaTeX/peak_pc_residual", analysis, ".pdf"), peak_pc_residual, dpi=600)
ggsave(paste0("./Plots/peak_pc_residual", analysis, ".png"), peak_pc_residual, dpi=600)



```


***

### Including all property x quarters in the first 3 years since first sale

***
```{r cache=FALSE, eval=FALSE, echo=FALSE}

return_since_peak2 <- return_plot(Resold.indexed.regress,"Peak2.unrealgainAdj", "peak")
print(return_since_peak2)

fig = fig + 1

return_since_peak2_pc <- return_plot_pc(Resold.indexed.regress,"Return_peak2_pc", "peak", 50)
print(return_since_peak2_pc)

```

***
```{r cache=FALSE, eval=FALSE, echo=FALSE}

dispos.effect.peak2 <- dispos_effect_plot2(Resold.indexed.regress,"Peak2.unrealgainAdj", "DurationPeak2", "peak", .025, 5)
print(dispos.effect.peak2)

fig = fig + 1


```



```{r cache=FALSE, eval=FALSE, echo=FALSE}

if (save =="Y") {

ggsave(paste0("./Plots/return.since.peak2.", analysis, ".Incl3yr.pdf"), return_since_peak2, dpi=600)
ggsave(paste0("./Plots/return.since.peak2.", analysis, ".Incl3yr.png"), return_since_peak2, dpi=600)

ggsave(paste0("./Plots/return.since.peak2_pc.", analysis, ".Incl3yr.pdf"), return_since_peak2_pc, dpi=600)
ggsave(paste0("./Plots/return.since.peak2_pc.", analysis, ".Incl3yr.png"), return_since_peak2_pc, dpi=600)
  
ggsave(paste0("./Plots/dispos.effect.peak2.", analysis, ".Incl3yr.pdf"), dispos.effect.peak2, dpi=600)
ggsave(paste0("./Plots/dispos.effect.peak2.", analysis, ".Incl3yr.png"), dispos.effect.peak2, dpi=600)
}


```




***

## New reference peak at the end of successful test period, no reference point during successful test period




***

### Including all property x quarters in the first 3 years since first sale

***
```{r cache=FALSE, eval=FALSE, echo=FALSE}

return.since.peak3 <- return_plot(Resold.indexed.regress,"Peak3.unrealgainAdj", "peak")
print(return.since.peak3)

return_since_peak3_pc <- return_plot_pc(Resold.indexed.regress,"Return_peak3_pc", "peak", 50)
print(return_since_peak3_pc)

fig = fig + 1
cat("Fig ",fig, "Return since purchase (10% sample). Including all property x quarters in the first 3 years since first sale")

```

***
```{r cache=FALSE, eval=FALSE, echo=FALSE}

dispos.effect.peak3 <- dispos_effect_plot2(Resold.indexed.regress,"Peak3.unrealgainAdj", "DurationPeak3", "peak", .025, 5)
print(dispos.effect.peak3)

fig = fig + 1
cat("Fig ",fig, "Probability of sale given return since purchase. Including all property x quarters in the first 3 years since first sale")

```



```{r cache=FALSE, eval=FALSE, echo=FALSE}

if (save =="Y") {

ggsave(paste0("./Plots/return.since.peak3.", analysis, ".Incl3yr.pdf"), return.since.peak3, dpi=600)
ggsave(paste0("./Plots/return.since.peak3.", analysis, ".Incl3yr.png"), return.since.peak3, dpi=600)

ggsave(paste0("./Plots/return.since.peak3_pc.", analysis, ".Incl3yr.pdf"), return_since_peak3_pc, dpi=600)
ggsave(paste0("./Plots/return.since.peak3_pc.", analysis, ".Incl3yr.png"), return_since_peak3_pc, dpi=600)
  
ggsave(paste0("./Plots/dispos.effect.peak3.", analysis, ".Incl3yr.pdf"), dispos.effect.peak3, dpi=600)
ggsave(paste0("./Plots/dispos.effect.peak3.", analysis, ".Incl3yr.png"), dispos.effect.peak3, dpi=600)
}


```

# Interaction peak

```{r cache=FALSE, echo = FALSE, eval = TRUE}

interaction_plot_peak <- interaction_plot(Resold.indexed.regress[!is.na(Peak_price),],"UnrealgainAdj", "Peak.unrealgainYN", "DurationVal", "purchase", "Peak", 0.025)
print(interaction_plot_peak)


if (save =="Y") {

ggsave(paste0("./Plots/LaTeX/interaction_plot_peak", analysis, "Both.pdf"), interaction_plot_peak, dpi=600)
ggsave(paste0("./Plots/interaction_plot_peak", analysis, "Both.png"), interaction_plot_peak, dpi=600)
}


```



```{r}
Return_act_est <- ggplot(Resold.indexed.regress[SoldInPeriod ==TRUE], aes(x=GrossProfitB, y= Unrealgain)) + 
  geom_point(alpha = 0.1, shape=20, size=0.5) +
  scale_x_continuous(paste0("Estimated gain (£)"),
                     limits = c(-25000, 150000))+
  scale_y_continuous("Actual gain (£)",
                     limits = c(-25000, 150000))+
  labs(subtitle ="Limits: x,y: -£25,000 -- +£150,000")

if (save =="Y") {

ggsave(paste0("./Plots/LaTeX/Return_act_est_", analysis, "Purchase.pdf"), Return_act_est, dpi=600)
ggsave(paste0("./Plots/Return_act_est_", analysis, "Purchase.png"), Return_act_est, dpi=600)
}


Return_act_estB <- ggplot(Resold.indexed.regress[SoldInPeriod ==TRUE], aes(x=GrossProfitB, y= Unrealgain)) + 
  geom_point(alpha = 0.1, shape=20, size=0.5) +
  scale_x_continuous(paste0("Estimated gain (£)"),
                     limits = c(-25000, 150000))+
  scale_y_continuous("Actual gain (£)",
                     limits = c(-25000, 150000))+
  labs(subtitle ="Limits: x,y: -£25,000 -- +£150,000")

if (save =="Y") {

ggsave(paste0("./Plots/LaTeX/Return_act_est_", analysis, "Both.pdf"), Return_act_estB, dpi=600)
ggsave(paste0("./Plots/Return_act_est_", analysis, "Both.png"), Return_act_estB, dpi=600)
}


```


```{r cache=FALSE, echo = FALSE, eval = TRUE}
# Return frequency histograms

return_purchase_freq <- return_freq(Resold.indexed.regress,"UnrealgainAdj", "Purchase")
print(return_purchase_freq)
ggsave(paste0("./Plots/LaTeX/return_purchase_freq", analysis, "Purchase.pdf"), return_purchase_freq, dpi=600)
ggsave(paste0("./Plots/return_purchase_freq", analysis, "Purchase.png"), return_purchase_freq, dpi=600)


return_purchase_pc_freq <- return_freq_pc(Resold.indexed.regress,"Return_purchase_pc", "Purchase", 100)
print(return_purchase_pc_freq)
ggsave(paste0("./Plots/LaTeX/return_purchase_pc_freq", analysis, "Purchase.pdf"), return_purchase_pc_freq, dpi=600)
ggsave(paste0("./Plots/return_purchase_pc_freq", analysis, "Purchase.png"),  return_purchase_pc_freq, dpi=600)

return_purchase_both_freq <- return_freq(Resold.indexed.regress[!is.na(Peak_price),],"UnrealgainAdj", "Purchase")
print(return_purchase_both_freq)
ggsave(paste0("./Plots/LaTeX/return_purchase_freq", analysis, "Both.pdf"), return_purchase_both_freq, dpi=600)
ggsave(paste0("./Plots/return_purchase_freq", analysis, "Both.png"), return_purchase_both_freq, dpi=600)


return_purchase_pc_both_freq <- return_freq_pc(Resold.indexed.regress[!is.na(Peak_price),],"Return_purchase_pc", "Purchase", 100)
print(return_purchase_pc_both_freq)
ggsave(paste0("./Plots/LaTeX/return_purchase_pc_freq", analysis, "Both.pdf"), return_purchase_pc_both_freq, dpi=600)
ggsave(paste0("./Plots/return_purchase_pc_freq", analysis, "Both.png"),  return_purchase_pc_both_freq, dpi=600)


return_peak_freq <- return_freq(Resold.indexed.regress[!is.na(Peak_price),],"Peak.unrealgainAdj", "Peak")
print(return_peak_freq)
ggsave(paste0("./Plots/LaTeX/return_peak_freq", analysis, "Both.pdf"), return_peak_freq, dpi=600)
ggsave(paste0("./Plots/return_peak_freq", analysis, "Both.png"),  return_peak_freq, dpi=600)

return_peak_pc_freq <- return_freq_pc(Resold.indexed.regress[!is.na(Peak_price),],"Return_peak_pc", "Peak", 100)
print(return_peak_pc_freq)
ggsave(paste0("./Plots/LaTeX/return_peak_pc_freq", analysis, "Both.pdf"), return_peak_pc_freq, dpi=600)
ggsave(paste0("./Plots/return_peak_pc_freq", analysis, "Both.png"),  return_peak_pc_freq, dpi=600)


```


```{r cache=FALSE, echo = FALSE, eval = FALSE}



# Influence of neighbouring postcodes


## Rules

   # When.sold is used in plotting neighbour rules. Neighbour_plot function
  When.sold <- fread(paste0("./Data/When.sold.", analysis, ".csv"))
  When.sold <- When.sold[, `:=`(Neighbour.reference.date=as.Date(Neighbour.reference.date, origin ="1970-01-01"),
                              Neighbour.date.end=as.Date(Neighbour.date.end, origin ="1970-01-01"))]


#  If it doesnot work also check have correct When.sold flie


# Reference price is valuation when neighbour sells property
Neighbour.plot1 <- Neighbour_plot("{03BB0826-8B08-495D-9A78-0087566C8CD1}", "Neighbour.valuation")
print(Neighbour.plot1)
fig = fig + 1
cat("Fig ",fig, "Reference price is valuation when neighbour sells property")

# Reference price is price neighbour achieves when they sell their property
Neighbour.plot2 <- Neighbour_plot("{03BB0826-8B08-495D-9A78-0087566C8CD1}", "Neighbour.price")
print(Neighbour.plot2)   
fig = fig + 1
cat("Fig ",fig, "Reference price is price neighbour achieves when they sell their property")


# Reference price is higher of 1) valuation when neighbour sells property & 2) price neighbour achieves when they sell their property 
Neighbour.plot3 <- Neighbour_plot("{03BB0826-8B08-495D-9A78-0087566C8CD1}", "Neighbour.reference")
print(Neighbour.plot3)
fig = fig + 1
cat("Fig ",fig, "Reference price is higher of 1) valuation when neighbour sells property & 2) price neighbour achieves when they sell their property")


  

if (save == "Y" ) {
    # Try Resold.indexed.regress.All1pcRules.csv & Resold.indexed.All1pcRules.csv & Peak_prices.All1pcRules.csv
  ggsave("./Plots/Neighbour.plot1.pdf", Neighbour.plot1, dpi=600)
  ggsave("./Plots/Neighbour.plot1.png", Neighbour.plot1, dpi=600)
  
  #ggsave("./Plots/Neighbour.plot2.pdf", Neighbour.plot2, dpi=600)
  #ggsave("./Plots/Neighbour.plot2.png", Neighbour.plot2, dpi=600)
  
  #ggsave("./Plots/Neighbour.plot3.pdf", Neighbour.plot3, dpi=600)
  #ggsave("./Plots/Neighbour.plot3.png", Neighbour.plot3, dpi=600)
  
}




```


***

```{r cache=FALSE, echo = FALSE, eval = FALSE}

# Influence of neighbouring postcodes

# Reference price is valuation when neighbour sells property

return_since_peak_Neighbour_valuation <- return_plot(Resold.indexed.regress, "Neighbour.valuation.unrealgainAdj", "neighbour sale")
print(return_since_peak_Neighbour_valuation)
fig = fig + 1
cat("Fig ",fig, "Return is indexed purchase price less reference price. Reference price is valuation when neighbour sells property.")

#Since regularly resetting reference price to the observation property's valuation unsurprising large peak at 0. 

dispos_effect_peak_Neighbour_valuation <- dispos_effect_plot2(Resold.indexed.regress,"Neighbour.valuation.unrealgainAdj", "Duration.neighbour", "neighbour sale", .025, 5)
print(dispos_effect_peak_Neighbour_valuation)
fig = fig + 1
cat("Fig ",fig, "Probability of sale given return. Return is indexed purchase price less reference price. Reference price is valuation when neighbour sells property. NB Scale")






if (save == "Y") {
  
  ggsave(paste0("./Plots/return_since_Neighbour_valuation_", analysis, ".pdf"), return_since_peak_Neighbour_valuation, dpi=600)
  ggsave(paste0("./Plots/return_since_Neighbour_valuation_", analysis, ".png"), return_since_peak_Neighbour_valuation, dpi=600)

  ggsave(paste0("./Plots/dispos_effect_peak_Neighbour_valuation_", analysis,  ".pdf"), dispos_effect_peak_Neighbour_valuation, dpi=600)
  ggsave(paste0("./Plots/dispos_effect_peak_Neighbour_valuation_", analysis,  ".png"), dispos_effect_peak_Neighbour_valuation, dpi=600)
  
 
}




```


```{r cache=FALSE, echo = FALSE, eval = FALSE}


# Reference price is price neighbour achieves when they sell their property
 
return_since_peak_Neighbour_price <- return_plot(Resold.indexed.regress, "Neighbour.price.unrealgainAdj", "neighbour sale")
print(return_since_peak_Neighbour_price)
fig = fig + 1
cat("Fig ",fig, "Return is indexed purchase price less reference price. Reference price is the price the neighbour achieves when they sell their property.")


dispos_effect_peak_Neighbour_price <- dispos_effect_plot2(Resold.indexed.regress, "Neighbour.price.unrealgainAdj", "Duration.neighbour", 0.1, 10)
print(dispos_effect_peak_Neighbour_price)
fig = fig + 1
cat("Fig ",fig, "Probability of sale given return. Reference price is the price the neighbour achieves when they sell their property..")


# Reference price is higher of 1) valuation when neighbour sells property & 2) price neighbour achieves when they sell their property 

return_since_peak_Neighbour_reference <- return_plot(Resold.indexed.regress, "Neighbour.reference.unrealgainAdj", "neighbour sale")
print(return_since_peak_Neighbour_reference)
fig = fig + 1
cat("Fig ",fig, "Return is indexed purchase price less reference price. Reference price is higher of 1) valuation when neighbour sells property & 2) price neighbour achieves when they sell their property.")


dispos_effect_Neighbour_reference <- dispos_effect_plot2(Resold.indexed.regress, "Neighbour.reference.unrealgainAdj", "Duration.neighbour", 0.1, 10)
print(dispos_effect_Neighbour_reference)
fig = fig + 1
cat("Fig ",fig, "Probability of sale given return. Reference price is higher of 1) valuation when neighbour sells property & 2) price neighbour achieves when they sell their property.")


if (save == "Y") {
  
  ggsave(paste0("./Plots/return_since_peak_Neighbour_price_", analysis, ".pdf"), return_since_peak_Neighbour_price, dpi=600)
  ggsave(paste0("./Plots/return_since_peak_Neighbour_price_", analysis, ".png"), return_since_peak_Neighbour_price, dpi=600)
  
  ggsave(paste0("./Plots/dispos_effect_peak_Neighbour_price_", analysis, ".pdf"), dispos_effect_peak_Neighbour_price, dpi=600)
  ggsave(paste0("./Plots/dispos_effect_peak_Neighbour_price_", analysis, ".png"), dispos_effect_peak_Neighbour_price, dpi=600)
  
  ggsave(paste0("./Plots/return_since_peak_Neighbour_reference_", analysis, ".pdf"), return_since_peak_Neighbour_reference, dpi=600)
  ggsave(paste0("./Plots/return_since_peak_Neighbour_reference_", analysis, ".png"), return_since_peak_Neighbour_reference, dpi=600)
  
  ggsave(paste0("./Plots/dispos_effect_Neighbour_reference_", analysis, ".pdf"), dispos_effect_Neighbour_reference, dpi=600)
  ggsave(paste0("./Plots/dispos_effect_Neighbour_reference_", analysis, ".png"), dispos_effect_Neighbour_reference, dpi=600)
}




```



### Including all property x quarters in the first 3 years since first sale

```{r cache=FALSE, echo = FALSE, eval = FALSE}

# Influence of neighbouring postcodes
# Reference price is valuation when neighbour sells property

return_since_peak_Neighbour_valuation <- return_plot(Resold.indexed.regress, "Neighbour.valuation.unrealgainAdj", "neighbour sale")
print(return_since_peak_Neighbour_valuation)
fig = fig + 1
cat("Fig ",fig, "Return is indexed purchase price less reference price. Reference price is valuation when neighbour sells property. Including all property x quarters in the first 3 years since first sale.")

#Since regularly resetting reference price to the observation property's valuation unsurprising large peak at 0.
# Dominated by value at 0, which breaks the quantiles, so 2 dispos effect plots

dispos_effect_peak_Neighbour_valuation <-  dispos_effect_plot2(Resold.indexed.regress,"Neighbour.valuation.unrealgainAdj", "Duration.neighbour", "neighbour sale", .025, 5)
print(dispos_effect_peak_Neighbour_valuation)
fig = fig + 1
cat("Fig ",fig, "Probability of sale given return. Return is indexed purchase price less reference price. Reference price is valuation when neighbour sells property.  Including all property x quarters in the first 3 years since first sale.")






if (save == "Y") {
  
  ggsave(paste0("./Plots/return_since_Neighbour_valuation_", analysis, "_Incl3yr.pdf"), return_since_peak_Neighbour_valuation, dpi=600)
  ggsave(paste0("./Plots/return_since_Neighbour_valuation_", analysis, "_Incl3yr.png"), return_since_peak_Neighbour_valuation, dpi=600)

  ggsave(paste0("./Plots/dispos_effect_Neighbour_valuation_", analysis,  "_Incl3yr.pdf"), dispos_effect_peak_Neighbour_valuation, dpi=600)
  ggsave(paste0("./Plots/dispos_effect_Neighbour_valuation_", analysis,  "_Incl3yr.png"), dispos_effect_peak_Neighbour_valuation, dpi=600)
  
  
}


Sys.time() - Time

```



```{r cache=FALSE, echo = FALSE, eval = FALSE}

# Reference price is price neighbour achieves when they sell their property

return_since_peak_Neighbour_price <- return_plot(Resold.indexed.regress, "Neighbour.price.unrealgainAdj", "neighbour sale")
print(return_since_peak_Neighbour_price)
fig = fig + 1
cat("Fig ",fig, "Return is indexed purchase price less reference price. Reference price is the price the neighbour achieves when they sell their property. Excluding all property x quarters in the first 3 years since first sale.")


dispos_effect_peak_Neighbour_price <- dispos_effect_plot2(Resold.indexed.regress, "Neighbour.price.unrealgainAdj", "Duration.neighbour", 0.1, 10)
print(dispos_effect_peak_Neighbour_price)
fig = fig + 1
cat("Fig ",fig, "Probability of sale given return. Reference price is the price the neighbour achieves when they sell their property.  Excluding all property x quarters in the first 3 years since first sale.")


# Reference price is higher of 1) valuation when neighbour sells property & 2) price neighbour achieves when they sell their property 


return_since_peak_Neighbour_reference <- return_plot(Resold.indexed.regress, "Neighbour.reference.unrealgainAdj", "neighbour sale")
print(return_since_peak_Neighbour_reference)
fig = fig + 1
cat("Fig ",fig, "Return is indexed purchase price less reference price. Reference price is higher of 1) valuation when neighbour sells property & 2) price neighbour achieves when they sell their property.  Excluding all property x quarters in the first 3 years since first sale.")


dispos_effect_Neighbour_reference <- dispos_effect_plot2(Resold.indexed.regress, "Neighbour.reference.unrealgainAdj", "Duration.neighbour", 0.1, 10)
print(dispos_effect_Neighbour_reference)
fig = fig + 1
cat("Fig ",fig, "Probability of sale given return. Reference price is higher of 1) valuation when neighbour sells property & 2) price neighbour achieves when they sell their property.  Excluding all property x quarters in the first 3 years since first sale.")

if (save == "Y") {
  
  ggsave(paste0("./Plots/return_since_peak_Neighbour_price_", analysis, "_Incl3yr.pdf"), return_since_peak_Neighbour_price, dpi=600)
  ggsave(paste0("./Plots/return_since_peak_Neighbour_price_", analysis, "_Incl3yr.png"), return_since_peak_Neighbour_price, dpi=600)
  
  ggsave(paste0("./Plots/dispos_effect_peak_Neighbour_price_", analysis, "_Incl3yr.pdf"), dispos_effect_peak_Neighbour_price, dpi=600)
  ggsave(paste0("./Plots/dispos_effect_peak_Neighbour_price_", analysis, "_Incl3yr.png"), dispos_effect_peak_Neighbour_price, dpi=600)
  
  ggsave(paste0("./Plots/return_since_peak_Neighbour_reference_", analysis, "_Incl3yr.pdf"), return_since_peak_Neighbour_reference, dpi=600)
  ggsave(paste0("./Plots/return_since_peak_Neighbour_reference_", analysis, "_Incl3yr.png"), return_since_peak_Neighbour_reference, dpi=600)
  
  ggsave(paste0("./Plots/dispos_effect_Neighbour_reference_", analysis, "_Incl3yr.pdf"), dispos_effect_Neighbour_reference, dpi=600)
  ggsave(paste0("./Plots/dispos_effect_Neighbour_reference_", analysis, "_Incl3yr.png"), dispos_effect_Neighbour_reference, dpi=600)
}





```



# Interaction neighbour

```{r cache=FALSE, echo = FALSE, eval = FALSE}

interaction_plot_neighbour <- interaction_plot(Resold.indexed.regress,"UnrealgainAdj", "Neighbour.valuation.unrealgainYN", "DurationVal", "purchase", "Neighbour", 0.025)
print(interaction_plot_neighbour)


if (save =="Y") {
ggsave(paste0("./Plots/interaction_plot_neighbour_", analysis, "_Incl3yr.pdf"), interaction_plot_neighbour, dpi=600)
ggsave(paste0("./Plots/interaction_plot_neighbour_", analysis, "_Incl3yr.png"), interaction_plot_neighbour, dpi=600)
}




```


```{r cache=FALSE, echo = FALSE, eval = FALSE}

interaction_plot_neighbour2 <- interaction_plot2(Resold.indexed.regress,"UnrealgainAdj", "Neighbour.valuation.unrealgainYN", "Neighbour.valuation.unrealgainAdj", "DurationVal", "purchase", "Neighbour sale", 0.025, "N")

print(interaction_plot_neighbour2)


if (save =="Y") {
ggsave(paste0("./Plots/interaction_plot_neighbour2_", analysis, "_Incl3yr.pdf"), interaction_plot_neighbour2, dpi=600)
ggsave(paste0("./Plots/interaction_plot_neighbour2_", analysis, "_Incl3yr.png"), interaction_plot_neighbour2, dpi=600)
}


interaction_plot_neighbour4 <- interaction_plot4(Resold.indexed.regress,"UnrealgainAdj", "Neighbour.valuation.unrealgainYN", "Neighbour.valuation.unrealgainAdj", "DurationVal", "purchase", "Neighbour sale", 0.025, "Y")
print(interaction_plot_neighbour4)

if (save =="Y") {
ggsave(paste0("./Plots/interaction_plot_neighbour4_", analysis, "_Incl3yr.pdf"), interaction_plot_neighbour4, dpi=600)
ggsave(paste0("./Plots/interaction_plot_neighbour4_", analysis, "_Incl3yr.png"), interaction_plot_neighbour4, dpi=600)
}




```



# Duration 

```{r cache=FALSE, echo = FALSE, eval =FALSE}

duration_plot_purchase <- duration_facet_plot(Resold.indexed.regress,"UnrealgainAdj", "DurationVal", "purchase",  0.025)
print(duration_plot_purchase[[1]])
print(duration_plot_purchase[[2]])

duration_plot_peak <- duration_facet_plot(Resold.indexed.regress,"Peak.unrealgainAdj", "DurationPeak", "peak",  0.025)
print(duration_plot_peak[[1]])
print(duration_plot_peak[[2]])

duration_plot_neighbour <- duration_facet_plot(Resold.indexed.regress,"Neighbour.valuation.unrealgainAdj", "Duration.neighbour", "neighbour sale",  0.025)
print(duration_plot_neighbour[[1]])
print(duration_plot_neighbour[[2]])

if (save =="Y") {
ggsave(paste0("./Plots/duration_plot_purchase_", analysis, "_Incl3yr.pdf"), duration_plot_purchase[[1]], dpi=600)
ggsave(paste0("./Plots/duration_plot_purchase_", analysis, "_Incl3yr.png"), duration_plot_purchase[[1]], dpi=600)
saveRDS(duration_plot_purchase[[2]], paste0("./Plots/duration_plot_purchase_", analysis, "_Incl3yr.rds"))

ggsave(paste0("./Plots/duration_plot_peak_", analysis, "_Incl3yr.pdf"), duration_plot_peak[[1]], dpi=600)
ggsave(paste0("./Plots/duration_plot_peak_", analysis, "_Incl3yr.png"), duration_plot_peak[[1]], dpi=600)
saveRDS(duration_plot_peak[[2]], paste0("./Plots/duration_plot_peak_", analysis, "_Incl3yr.rds"))

ggsave(paste0("./Plots/duration_plot_neighbour_", analysis, "_Incl3yr.pdf"), duration_plot_neighbour[[1]], dpi=600)
ggsave(paste0("./Plots/duration_plot_neighbour_", analysis, "_Incl3yr.png"), duration_plot_neighbour[[1]], dpi=600)
saveRDS(duration_plot_neighbour[[2]], paste0("./Plots/duration_plot_neighbour_", analysis, "_Incl3yr.rds"))

}

```


# Duration interaction

```{r cache=FALSE, echo = FALSE, eval = FALSE}




duration_plot_peak_interaction1 <- duration_facet_plot2(Resold.indexed.regress,"UnrealgainAdj",  "Peak.unrealgainYN", "Peak.unrealgainAdj", "DurationVal", "purchase", "Peak",  0.025, "N")
print(duration_plot_peak_interaction1[[1]])
print(duration_plot_peak_interaction1[[2]])

duration_plot_peak_interaction2 <- duration_facet_plot3(Resold.indexed.regress,"UnrealgainAdj",  "Peak.unrealgainYN", "Peak.unrealgainAdj", "DurationVal", "purchase", "Peak",  0.025, "Y")
print(duration_plot_peak_interaction2[[1]])
print(duration_plot_peak_interaction2[[2]])



duration_plot_neighbour_interaction1 <- duration_facet_plot2(Resold.indexed.regress,"UnrealgainAdj",  "Neighbour.valuation.unrealgainYN", "Neighbour.valuation.unrealgainAdj", "DurationVal", "purchase", "Neighbour sale",  0.025, "Y")
print(duration_plot_neighbour_interaction1[[1]])
print(duration_plot_neighbour_interaction1[[2]])

duration_plot_neighbour_interaction2 <- duration_facet_plot3(Resold.indexed.regress,"UnrealgainAdj",  "Neighbour.valuation.unrealgainYN", "Neighbour.valuation.unrealgainAdj", "DurationVal", "purchase", "Neighbour sale",  0.03, "Y")
print(duration_plot_neighbour_interaction2[[1]])
print(duration_plot_neighbour_interaction2[[2]])



if (save =="Y") {

ggsave(paste0("./Plots/duration_purchase_peak1_", analysis, "_Incl3yr.pdf"), duration_plot_peak_interaction1[[1]], dpi=600)
ggsave(paste0("./Plots/duration_purchase_peak1_", analysis, "_Incl3yr.png"), duration_plot_peak_interaction1[[1]], dpi=600)
saveRDS(duration_plot_peak_interaction1[[2]], paste0("./Plots/duration_purchase_peak1_", analysis, "_Incl3yr.rds"))

ggsave(paste0("./Plots/duration_purchase_peak2_", analysis, "_Incl3yr.pdf"), duration_plot_peak_interaction2[[1]], dpi=600)
ggsave(paste0("./Plots/duration_purchase_peak2_", analysis, "_Incl3yr.png"), duration_plot_peak_interaction2[[1]], dpi=600)
saveRDS(duration_plot_peak_interaction2[[2]], paste0("./Plots/duration_purchase_peak2_", analysis, "_Incl3yr.rds"))

ggsave(paste0("./Plots/duration_purchase_neighbour1_", analysis, "_Incl3yr.pdf"), duration_plot_neighbour_interaction1[[1]], dpi=600)
ggsave(paste0("./Plots/duration_purchase_neighbour1_", analysis, "_Incl3yr.png"), duration_plot_neighbour_interaction1[[1]], dpi=600)
saveRDS(duration_plot_neighbour_interaction1[[2]], paste0("./Plots/duration_purchase_neighbour1_", analysis, "_Incl3yr.rds"))

ggsave(paste0("./Plots/duration_purchase_neighbour2_", analysis, "_Incl3yr.pdf"), duration_plot_neighbour_interaction2[[1]], dpi=600)
ggsave(paste0("./Plots/duration_purchase_neighbour2_", analysis, "_Incl3yr.png"), duration_plot_neighbour_interaction2[[1]], dpi=600)
saveRDS(duration_plot_neighbour_interaction2[[2]], paste0("./Plots/duration_purchase_neighbour2_", analysis, "_Incl3yr.rds"))

}


```


```{r cache=FALSE, echo = FALSE, eval = FALSE}


Neighbour_firstval <- reference_difference_plot(Resold.indexed.regress, "First_val", "Neighbour.reference.date", "purchase and neighbour sale")
print(Neighbour_firstval)

if (save == "Y") {
  ggsave(paste0("./Plots/Neighbour_firstval_", analysis, "_Incl3yr.png"), Neighbour_firstval, dpi =600)
  ggsave(paste0("./Plots/Neighbour_firstval_", analysis, "_Incl3yr.pdf"), Neighbour_firstval, dpi =600)

}


Neighbour_valdate <- reference_difference_plot(Resold.indexed.regress, "Neighbour.reference.date", "date.val", "valuation and neighbour sale")
print(Neighbour_valdate)

if (save == "Y") {
  ggsave(paste0("./Plots/Neighbour_valdate_", analysis, "_Incl3yr.png"), Neighbour_valdate, dpi =600)
  ggsave(paste0("./Plots/Neighbour_valdate_", analysis, "_Incl3yr.pdf"), Neighbour_valdate, dpi =600)
}



Peak_firstval <- reference_difference_plot(Resold.indexed.regress, "First_val", "Peak_date_start", "purchase and peak")
print(Peak_firstval)

if (save == "Y") {
  ggsave(paste0("./Plots/Peak_firstval_", analysis, "_Incl3yr.png"), Peak_firstval, dpi =600)
  ggsave(paste0("./Plots/Peak_firstval_", analysis, "_Incl3yr.pdf"), Peak_firstval, dpi =600)
}


Peak_valdate <- reference_difference_plot(Resold.indexed.regress, "Peak_date_start", "date.val", "valuation and peak")
print(Peak_valdate)


if (save == "Y") {
  ggsave(paste0("./Plots/Peak_valdate_", analysis, "_Incl3yr.png"), Peak_valdate, dpi =600)
  ggsave(paste0("./Plots/Peak_valdate_", analysis, "_Incl3yr.pdf"), Peak_valdate, dpi =600)
  
}


```



```{r cache=FALSE, echo = FALSE, eval = FALSE}

# gridplots


Rules_main <- plot_grid(Purchase_plot1, Peak_price_plot1, Neighbour.plot1, labels = c("A", "B", "C"), align = "h", nrow =1)
Rules_main

Rules_peak_Alt <- plot_grid(Peak_price_plot2, Peak_price_plot3, labels = c("A", "B"), align = "h")
  Rules_peak_Alt
  

return_purchase_grid <- plot_grid(return_since_purchase, return_since_purchase_x3, labels = c("A", "B"), align = "h", nrow =1)
return_purchase_grid 


if (save == "Y") {
  ggsave("./Plots/Rules_main.pdf", Rules_main, dpi =600)
  ggsave("./Plots/Rules_main.png", Rules_main, dpi =600)

  ggsave("./Plots/Rules_peak_Alt.pdf", Rules_peak_Alt, dpi =600)
  ggsave("./Plots/Rules_peak_Alt.png", Rules_peak_Alt, dpi =600)

  ggsave("./Plots/return_purchase_grid.pdf", return_purchase_grid, dpi =600)
  ggsave("./Plots/return_purchase_grid.png", return_purchase_grid, dpi =600)
  
  
}

```


```{cache=FALSE, echo = FALSE, eval = FALSE}

# Bin scatter example for presentations etc

# select an appropriate property with multiple buy and sells 
(Result <- Resold.indexed[, .N, by=.(Property_Id, TUI)][, .N, by = Property_Id][order(N)])
Subset <-  Resold.indexed[Property_Id=="8846893",]



# Bins reduced friom 400 to 4
if(analysis == "All1pc"){
  
  CutByReturn <-function(dt, quartile, multiplier) {
    # quartile - Either "All" or quartile name e.g. "1Q"
    if (quartile!="All") {
      dt <- dt[Region.bin==quartile,]
    }
    
    # Calculate the percentiles of return
    return.quantiles <- unique(quantile(dt[,UnrealgainAdj], probs=seq(0,1,.25), na.rm=TRUE))
    # Bin by return percentile
    dt <- dt[,return.bin:=cut2(UnrealgainAdj, return.quantiles)]
    return(dt)
  }} else {CutByReturn <-function(dt, quartile, multiplier) {
    # quartile - Either "All" or quartile name e.g. "1Q"
    if (quartile!="All") {
      dt <- dt[Region.bin==quartile,]
    }
    
    # Calculate the percentiles of return
    return.quantiles <- unique(quantile(dt[,UnrealgainAdj], probs=seq(0,1,.25), na.rm=TRUE))
    # Bin by return percentile
    dt <- dt[,return.bin:=cut2(UnrealgainAdj, return.quantiles, include.lowest=TRUE)]
    return(dt)
  }}



# Bin boundaries are hard coded and so will require updating with change in property (Cut off function after data<-BinByReturn(..) to determine boundaries)

dispos_effect_plot <- function(DT, AdjUnrealGain, Valuation.duration, xlabel) {
  data <- DT[, .(UnrealgainAdj = get(AdjUnrealGain), sold.dummy, DurationVal = get(Valuation.duration))][, DurationB := NA]
  data<-BinByReturn(data, lowlimit, highlimit, "All", "All", NA,1)
  dispos.effect <-ggplot(data, aes(x=bin.mean.gain)) +
    geom_point(data = DT, mapping = aes(x = UnrealgainAdj, y = sold.dummy), shape = 19, size = .75, colour ="magenta")+ 
    geom_point(aes(y=proportion.sold), shape = 19)+ 
    geom_vline(xintercept = c(-0.0405, 0.0855, 0.2532), linetype ="dotted") +
    scale_x_continuous(paste0("Return since ", xlabel, " (£1,000)"),
                       limits = c(lowlimit, highlimit), 
                       breaks=seq(lowlimit, highlimit, by =0.1), 
                       labels =seq(lowlimit*100, highlimit*100, by =10))+
    scale_y_continuous("Probability of sale",
                       limits = c(0, 1.05),
                       breaks=seq(0, 1, by =0.1),
                       labels =seq(0, 1, by =0.1)) +
    labs(subtitle ="Limits: x: -£40,000 -- +£70,000") 
}

bin_scatter <- dispos_effect_plot(Subset,"UnrealgainAdj", "DurationVal", "purchase")
print(dispos.effect)

ggsave(paste0("./Plots/bin_scatter.pdf"), bin_scatter, dpi=600)
ggsave(paste0("./Plots/bin_scatter.png"), bin_scatter, dpi=600)


```

#  Esimated gain - bias?


```{r cache=FALSE, eval=FALSE}

bias <- Resold.indexed[SoldInPeriod=="TRUE",.(PriceA, PriceB, RealGain=PriceB-PriceA, Unrealgain)][, Difference:=RealGain-Unrealgain]

ggplot(bias, aes(Difference)) +
  geom_histogram(aes(y=(..density..)), position='dodge', binwidth = 1000, fill="darkblue")+
  scale_x_continuous("Real gain - estimated gain from purchase to sale (£000). Binwidth = £1000", limits = c(-200000, 200000), breaks=seq(-200000, 200000, by =50000), labels =seq(-200, 200, by =50))+
  scale_y_continuous("Density") + 
  geom_density(colour="red")

fig = fig + 1
cat("Fig ",fig)

cat("Observations included in plots",round(100*length(bias[Difference>=-200000 & Difference<= 200000, Difference])/length(bias[,Difference]),2),"%") 

bias[, .(Mean = mean(Difference), SD = sd(Difference))]

```




***

```{r cache=FALSE, eval=TRUE, echo=FALSE}
Sys.time() - Time

```



